---
title: "Uneven Paths: Recovery in Louisiana Parishes after Hurricanes Katrina and Rita"
subtitle: "Replication Script"
author: "Tim Fraser, Alex Poniatowski, Nick Hersey, Haoran Zheng, & Daniel P. Aldrich"
date: "February 25, 2021"
output:
  html_document:
    df_print: kable
    toc: yes
  pdf_document:
    toc: yes
---


# 1. Packages

First, we must download and load the packages for this analysis. We rely on the **gsynth** package for our analysis, the newest and most state of the art package for synthetic control experiments, developed by Yiqing Xu in 2017. 


```{r, message = FALSE, warning = FALSE}
# Load the following packages
library(tidyverse) # for data manipulation
library(readxl) # for excel files
library(viridis) # colors
library(ggpubr) # arranging plots
library(texreg) # for model tables

#install.packages(c("tidyverse", "viridis", "texreg","ggpubr", "sf", "rgdal", "tidygraph", "ggraph"))

# if not already installed, 
# must install gsynth and panelView packages FROM GITHUB, not CRAN
#install.packages('devtools', repos = 'http://cran.us.r-project.org')


##devtools::install_github('xuyiqing/gsynth') # doesn't always work
#remotes::install_github("xuyiqing/gsynth")
#devtools::install_github('xuyiqing/panelView')   # if not already installed

library(gsynth)
library(panelView)
library(knitr)

#install.packages(c("kableExtra", "ggtext"))

# For a step-by-step example,
# Please consult Yiqing Xu's excellent tutorial here:
# https://yiqingxu.org/software/gsynth/gsynth_examples.html

#remotes::install_github("luukvdmeer/sfnetworks")
library(sf)
library(rgdal)
library(tidygraph)
library(ggraph)
library(sfnetworks)
```



# 2. Migration Data Builder

## 2.1 Extract data

First, let's write a function to extract IRS county-pair migration data from their datasets from 1992 until 2018.

```{r, message = FALSE, warning = FALSE}
library(tidyverse)
library(readxl)


extract = function(file){
  #INFLOW = to, from, 

  # If the file is an .xlsx file,
  if(file %>% str_detect(pattern = ".xls") == TRUE &
     file %>% str_detect(pattern = "in") == TRUE){
    
    out = read_xls(path = paste("migration/", file, sep = ""), skip = 7) %>%
      select(state_to = 1,
             county_to = 2,
             state_from = 3,
             county_from = 4,
             returns = 7,
             indiv_exemptions = 8,
             income = 9) %>%
      # Convert to double format
      mutate_all(funs(as.double)) %>% 
      # Convert state and county ID codes to 2 and 3 character strings
      mutate_at(vars("state_to", "state_from"), funs(
        str_pad(., width = 2, side = "left", pad = "0"))) %>%
      mutate_at(vars("county_to", "county_from"), funs(
        str_pad(., width = 3, side = "left", pad = "0"))) %>%
            # Specify the type (in/out migration) and year
      mutate(
        type = file %>% str_extract(pattern = "in|out"),
        year_from = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(1,2),
        year_to = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(3,4)) %>%
     # Now reformat the years
     mutate(
       year_from = if_else(year_from %>% as.numeric < 50,
        # Put the number in the 2000s
        year_from %>% paste("20", ., sep = ""),
        # If greater than 50, then put the number in the 1900s
        year_from %>% paste("19", ., sep = "")),
       
       year_to = if_else(year_to %>% as.numeric < 50,
        # Put the number in the 2000s
        year_to %>% paste("20", ., sep = ""),
        # If greater than 50, then put the number in the 1900s
        year_to %>% paste("19", ., sep = ""))
     )
  }
  
  #  OUTFLOW = from, to
  # If the file is an .xlsx file,
  if(file %>% str_detect(pattern = ".xls") == TRUE &
     file %>% str_detect(pattern = "out") == TRUE){
    
    out = read_xls(path = paste("migration/", file, sep = ""), skip = 7) %>%
      select(state_from = 1,
             county_from = 2,
             state_to = 3,
             county_to = 4,
             returns = 7,
             indiv_exemptions = 8,
             income = 9) %>%
      # Convert to double format
      mutate_all(funs(as.double)) %>% 
      # Convert state and county ID codes to 2 and 3 character strings
      mutate_at(vars("state_to", "state_from"), funs(
        str_pad(., width = 2, side = "left", pad = "0"))) %>%
      mutate_at(vars("county_to", "county_from"), funs(
        str_pad(., width = 3, side = "left", pad = "0"))) %>%
            # Specify the type (in/out migration) and year
      mutate(
        type = file %>% str_extract(pattern = "in|out"),
        year_from = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(1,2),
        year_to = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(3,4)) %>%
     # Now reformat the years
     mutate(
       year_from = if_else(year_from %>% as.numeric < 50,
        # Put the number in the 2000s
        year_from %>% paste("20", ., sep = ""),
        # If greater than 50, then put the number in the 1900s
        year_from %>% paste("19", ., sep = "")),
       
       year_to = if_else(year_to %>% as.numeric < 50,
        # Put the number in the 2000s
        year_to %>% paste("20", ., sep = ""),
        # If greater than 50, then put the number in the 1900s
        year_to %>% paste("19", ., sep = ""))
     )
  }
  
  # Metadata for the 2011+ files here:
  # https://www.irs.gov/pub/irs-soi/1112inpublicmigdoc.pdf
  #FOR OUTFLOW (ORIGIN) state_from = 1, (DESTINATION) state_to = 3; 
  #FOR INFLOW, (DESTINATION) state_to = 1, (ORIGIN) state_from = 3

  # If the file is a .csv and is about inmigration
  if(file %>% str_detect(pattern = ".csv") == TRUE &
     file %>% str_detect(pattern = "in") == TRUE){
  out = read_csv(paste("migration/", file, sep = "")) %>%
      select(state_to = 1,
             county_to = 2,
             state_from = 3,
             county_from = 4,
             returns = 7,
             indiv_exemptions = 8,
             income = 9) %>%
      # Filter just to people coming to Louisiana counties
      filter(state_to == "22") %>%
      # Convert all to double
      mutate_all(funs(as.double)) %>% 
      # Convert state and county back into character strings
      mutate_at(vars("state_to", "state_from"), funs(
        str_pad(., width = 2, side = "left", pad = "0"))) %>%
      mutate_at(vars("county_to", "county_from"), funs(
        str_pad(., width = 3, side = "left", pad = "0"))) %>%
            # Specify the type (in/out migration) and year
      mutate(
        type = file %>% str_extract(pattern = "in|out"),
        year_from = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(1,2),
        year_to = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(3,4)) %>%
     # Now reformat the years
     mutate(
       year_from = if_else(year_from %>% as.numeric < 50,
        # Put the number in the 2000s
        year_from %>% paste("20", ., sep = ""),
        # If greater than 50, then put the number in the 1900s
        year_from %>% paste("19", ., sep = "")),
       
       year_to = if_else(year_to %>% as.numeric < 50,
        # Put the number in the 2000s
        year_to %>% paste("20", ., sep = ""),
        # If greater than 50, then put the number in the 1900s
        year_to %>% paste("19", ., sep = ""))
     )
  }
  
    # Metadata for the 2011+ files here:
  # https://www.irs.gov/pub/irs-soi/1112inpublicmigdoc.pdf
  #FOR OUTFLOW (ORIGIN) state_from = 1, (DESTINATION) state_to = 3; 
  #FOR INFLOW, (DESTINATION) state_to = 1, (ORIGIN) state_from = 3

  # If the file is a .csv and focuses on outmigration
  if(file %>% str_detect(pattern = ".csv") == TRUE &
     file %>% str_detect(pattern = "out") == TRUE){
   out = read_csv(paste("migration/", file, sep = "")) %>%
      # Name columns based on file format
      select(state_from = 1,
             county_from = 2,
             state_to = 3,
             county_to = 4,
             returns = 7,
             indiv_exemptions = 8,
             income = 9) %>%
      # Filter just to people leaving from Louisiana counties
      filter(state_from == "22") %>%
      # Convert all to double
      mutate_all(funs(as.double)) %>%
      # Convert state and county back into character strings
      mutate_at(vars("state_to", "state_from"), funs(
        str_pad(., width = 2, side = "left", pad = "0"))) %>%
      mutate_at(vars("county_to", "county_from"), funs(
        str_pad(., width = 3, side = "left", pad = "0"))) %>%
      # Specify the type (in/out migration) and year
      mutate(
        type = file %>% str_extract(pattern = "in|out"),
        year_from = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(1,2),
        year_to = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(3,4)) %>%
     # Now reformat the years
     mutate(
       year_from = if_else(year_from %>% as.numeric < 50,
        # Put the number in the 2000s
        year_from %>% paste("20", ., sep = ""),
        # If greater than 50, then put the number in the 1900s
        year_from %>% paste("19", ., sep = "")),
       
       year_to = if_else(year_to %>% as.numeric < 50,
        # Put the number in the 2000s
        year_to %>% paste("20", ., sep = ""),
        # If greater than 50, then put the number in the 1900s
        year_to %>% paste("19", ., sep = ""))
     )

  }
  
  # Metadata for the 2017+ files here:
  # https://www.irs.gov/pub/irs-soi/1718inpublicmigdoc.pdf
  # FOR OUTFLOW (ORIGIN) state_from = 1, (DESTINATION) state_to = 3; 
  # FOR INFLOW, (DESTINATION) state_to = 1, (ORIGIN) state_from = 3

  # If the file is a .txt and focuses on inmigration
  if(file %>% str_detect(pattern = ".txt") == TRUE &
     file %>% str_detect(pattern = "in") == TRUE){
    out = read_delim(paste("migration/", file, sep =""), delim = ",") %>%
      # Name columns based on file format
      select(state_to = 1,
             county_to = 2,
             state_from = 3,
             county_from = 4,
             returns = 7,
             indiv_exemptions = 8,
             income = 9) %>%
      # Filter just to people leaving from Louisiana counties
      filter(state_to == "22") %>%
      # Convert all to double
      mutate_all(funs(as.double)) %>%
      # Convert state and county back into character strings
      mutate_at(vars("state_to", "state_from"), funs(
        str_pad(., width = 2, side = "left", pad = "0"))) %>%
      mutate_at(vars("county_to", "county_from"), funs(
        str_pad(., width = 3, side = "left", pad = "0"))) %>%
      # Specify the type (in/out migration) and year
      mutate(
        type = file %>% str_extract(pattern = "in|out"),
        year_from = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(1,2),
        year_to = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(3,4)) %>%
      # Now reformat the years
      mutate(
        year_from = if_else(
          year_from %>% as.numeric < 50,
          # Put the number in the 2000s
          year_from %>% paste("20", ., sep = ""),
          # If greater than 50, then put the number in the 1900s
          year_from %>% paste("19", ., sep = "")),
        
        year_to = if_else(
          year_to %>% as.numeric < 50,
          # Put the number in the 2000s
          year_to %>% paste("20", ., sep = ""),
          # If greater than 50, then put the number in the 1900s
          year_to %>% paste("19", ., sep = ""))
      )
    
  }
  
    # Metadata for the 2017+ files here:
  # https://www.irs.gov/pub/irs-soi/1718inpublicmigdoc.pdf
  # FOR OUTFLOW (ORIGIN) state_from = 1, (DESTINATION) state_to = 3; 
  # FOR INFLOW, (DESTINATION) state_to = 1, (ORIGIN) state_from = 3

  # If the file is a .txt and focuses on outmigration
    if(file %>% str_detect(pattern = ".txt") == TRUE &
     file %>% str_detect(pattern = "out") == TRUE){
    out = read_delim(paste("migration/", file, sep =""), delim = ",") %>%
      # Name columns based on file format
      select(state_from = 1,
             county_from = 2,
             state_to = 3,
             county_to = 4,
             returns = 7,
             indiv_exemptions = 8,
             income = 9) %>%
      # Filter just to people leaving from Louisiana counties
      filter(state_from == "22") %>%
      # Convert all to double
      mutate_all(funs(as.double)) %>%
      # Convert state and county back into character strings
      mutate_at(vars("state_to", "state_from"), funs(
        str_pad(., width = 2, side = "left", pad = "0"))) %>%
      mutate_at(vars("county_to", "county_from"), funs(
        str_pad(., width = 3, side = "left", pad = "0"))) %>%
      # Specify the type (in/out migration) and year
      mutate(
        type = file %>% str_extract(pattern = "in|out"),
        year_from = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(1,2),
        year_to = file %>% 
          str_extract(pattern = "[0-9]{4}") %>% str_sub(3,4)) %>%
      # Now reformat the years
      mutate(
        year_from = if_else(
          year_from %>% as.numeric < 50,
          # Put the number in the 2000s
          year_from %>% paste("20", ., sep = ""),
          # If greater than 50, then put the number in the 1900s
          year_from %>% paste("19", ., sep = "")),
        
        year_to = if_else(
          year_to %>% as.numeric < 50,
          # Put the number in the 2000s
          year_to %>% paste("20", ., sep = ""),
          # If greater than 50, then put the number in the 1900s
          year_to %>% paste("19", ., sep = ""))
      )
    
  }

  return(out)

}

```

## 2.2 Collect County-Pairs

Now let's run the function, generating our list of county-pair moves relevant to Louisiana counties.

```{r, message = FALSE, warning = FALSE}
library(readxl)

# First, let's record the kind of state codes we don't want in our data
not = c("57", # Flows Abroad
        "58", # Other Flows
        "59", # Other Flows
        "96", # US & Foreign
        "97", # US
        "98") # Foreign


# For each file in our directory,
data.frame(file = dir("migration")) %>% 
  # Filter out the data from 1990-1992, 
  # which was not formatted for this analysis.
  filter(!str_detect(file, "9091|9192")) %>% unlist() %>%
  # Run the extract function on each dataset, bind together
  map_dfr(~extract(.)) %>% 
  # Filter to just people coming to or from Louisiana
  filter(state_to == "22" | state_from == "22") %>%
  ungroup %>%
  # Convert state to character
  mutate(state_to = state_to %>% as.character() %>%
           str_pad(width = "2", side = "left", pad = "0"),
         state_from = state_from %>% as.character() %>%
           str_pad(width = "2", side = "left", pad = "0")) %>%
  # Remove any pairs that are about movments 
  # of the types described above
  filter(!state_to %in% not &
           !state_from %in% not) %>%
  # Remove any pairs that are state totals
  filter(county_to != "000" & county_from != "000") %>%
  # Create a FIPS code, which combines state and county codes
  mutate(
    fips_to = paste(
      state_to, 
      county_to, sep =""),
    fips_from = paste(
      state_from, 
      county_from, sep = "")) %>%
  # and remove any pairs that are loops - between the same state & county
  filter(fips_to != fips_from) %>%
    # Filter to look at filing years starting in taxyear 1995-1996 (year_to == 1996),
  # since they only started using gross adjusted income then.
  filter(year_to >= 1996) %>%
  # and export
  write_csv("data/migration.csv")
```



## 2.3 In & Outmigration Tallies

Now, let's calculate total in- and out-migration for each county, as estimated by the IRS based on tax filing.

Importantly, we need to filter out some data that will otherwise lead us to duplicate estimates. Here are some examples:

So when paired with a normal state, "000" means total, "001" means same state, and "003" means different state.

```{r}
# I want to know inmigration FROM other places TO LA
# state_to == Louisiana
# I want to know outmigration FROM LA TO other places
# state_from == Louisiana

bind_rows(
  # Record Inmigration to LA
  read_csv("data/migration.csv", 
           col_types = cols(
             "state_to" = col_character(),
             "fips_to" = col_character())) %>%
    filter(state_to == "22" & type == "in") %>%
    # Group by FIPS code and year of filing 
    # (eg. describing activities of prior year)
    group_by(fips = fips_to, year = year_to) %>%
    summarize(returns = sum(returns, na.rm = TRUE),
              indiv_exemptions = sum(indiv_exemptions, na.rm = TRUE),
              income = sum(income, na.rm = TRUE)) %>% 
    mutate(type = "in") %>% ungroup(),
  # Record Outmigration from LA
  read_csv("data/migration.csv", 
           col_types = cols(
             "state_to" = col_character(),
             "fips_to" = col_character())) %>%
    filter(state_from == "22" & type == "out") %>%
    # Group by FIPS code and year of filing
    # (eg. describing activities of prior year)

    group_by(fips = fips_from, year = year_to) %>%
    summarize(returns = sum(returns, na.rm = TRUE),
              indiv_exemptions = sum(indiv_exemptions, na.rm = TRUE),
              income = sum(income, na.rm = TRUE)) %>% 
    mutate(type = "out") %>% ungroup()) %>%
 
  # Pivot out longer into one tidy data.frame...
  pivot_longer(
    cols = -c(fips, year, type),
    names_to = "measure",
    values_to = "value") %>%
  # So that we can pivot it out wider into a nice matrix
  # where each row is a single Louisiana county
  pivot_wider(
    id_cols = fips,
    names_from = c(measure, type, year),
    values_from = value) %>%
  # Export to file
  write_csv("data/migration_total.csv")
```


These tables give us three very useful kinds of information
For each year, how many people moved, taking with them...
1. returns (number of households)
2. indiv_exemptions (number of people who moved)
3. income (total adjusted gross income in THOUSANDS of dollars, starting with filing year 1995-1996)

Importantly, they still provide SOMETHING for income in years 1992-1995, but it is unclear to me how it is different. There does not appear to be documentation. As a result, our analysis should just use data from 1996 - 2018.


Below, I outline our data analysis process, beginning with the import and construction of our dataset.

```{r, message = FALSE, warning=FALSE, echo = FALSE}
# Load libraries
library(tidyverse)
```

# 3. Data Wrangling

## 3.1 Parishes Dataset

Let's import and grab several key variables from this dataset.
```{r, message = FALSE, warning= FALSE, echo = FALSE}
# Import dataset
read_csv("data/laparishesdata.csv") %>%
    # grab and rename these variables
  select(parish = `Parish`,
         fips = `County Code`,
         disaster = `Disaster Zone`,
         hard = `Hard`,
         soft = `Soft`,
         state = `State`,
         local = `Local`,
         region = `LA Region`,
         damaged = `# Occupied Units Damaged`,
         damaged_percent = `Percent of Occupied Units Damaged (FEMA/HUD)`,
#         deceased = `Deceased Victims Recovered - LSU Hurricane Public Health Center (alternate source noted in comments)`,
         coastal = `Coastal`) %>%
  # Edit data
  mutate(
    # Convert percent to numeric
    damaged_percent = str_remove(damaged_percent, "%") %>% as.numeric,
    # convert FIPS to character
    fips = as.character(fips)) %>%
  # Fix NAs, which should be zeros
  mutate(damaged = if_else(is.na(damaged), 0, damaged),
         damaged_percent = if_else(is.na(damaged_percent), 0, damaged_percent),
         # Mark all parishes outside the disaster zone as not coastal
         coastal = if_else(is.na(coastal), "No", coastal) %>% tolower()) %>%
  # Export to file
  write_csv("data/raw_data.csv")
```


## 3.2 FEMA Disaster Declarations

Next, to account for OTHER disasters happening throughout our study period, we are going to also control for the number of disaster declarations in these counties over time using data from FEMA (https://www.fema.gov/openfema-data-page/disaster-declarations-summaries-v2).

It's not perfect, as it does not account for differential levels of damage, but it does a really good job of capturing the incidence of MAJOR disasters at the county level over time, the kind of thing we would expect would jeopardize long-term recovery after Hurricanes Katrina and Rita.

See the description from FEMA's website:

"FEMA Disaster Declarations Summary is a summarized dataset describing all federally declared disasters. This dataset lists all official FEMA Disaster Declarations, beginning with the first disaster declaration in 1953 and features all three disaster declaration types: major disaster, emergency, and fire management assistance. The dataset includes declared recovery programs and geographic areas (county not available before 1964; Fire Management records are considered partial due to historical nature of the dataset).

Please note the unique structure of the disaster sequencing (due to a numbering system that originated in the 1950's-1970's):

0001-1999 Major Disaster Declaration

2000-2999 Fire Management

3000-3999 Emergency Declaration (Special Emergency)

4000- Major Disaster Declaration

For more information on the disaster declaration process, see https://www.fema.gov/disasters and https://www.fema.gov/disasters/how-declared

This is raw, unedited data from FEMA's National Emergency Management Information System (NEMIS) and as such is subject to a small percentage of human error. The dataset is primarily composed of historical data that was manually entered into NEMIS after it launched in 1998. The financial information is derived from NEMIS and not FEMA's official financial systems."

```{r}
# Download from source
#download.file(url = "https://www.fema.gov/api/open/v2/DisasterDeclarationsSummaries.csv",
#              destfile = "data/fema.csv")
# Downloaded on February 21, 2021

#  Read in FEMA disaster declaration summaries
read_csv("data/fema.csv") %>%
  # Zoom into county-specific disaster designations
  filter(fipsCountyCode != "000") %>%
  filter(state == "LA") %>%
  select(year = fyDeclared, county = fipsCountyCode, 
         disaster_id = femaDeclarationString, name = declarationTitle) %>%
  mutate(county = paste("22", str_pad(county, width = 3, side = "left", pad = "0"), sep = "")) %>%
  # Zoom into distinct disasters per county-year
  distinct() %>%
  saveRDS("data/fema_disasters.rds")
```


## 3.3 National Historical GIS database

Next, let's merge in key variables from the National Historical GIS database. Many thanks to them for their excellent collection of time-series, standardized parish data. The great geographers over there have taken variables like **population** and adjusted values for previous time periods, when jurisidctional boundaries might have changed. See citation below.

Steven Manson, Jonathan Schroeder, David Van Riper, and Steven Ruggles. IPUMS National Historical Geographic Information System: Version 14.0 [Database]. Minneapolis, MN: IPUMS. 2019. http://doi.org/10.18128/D050.V14.0

First, we'll format their standardized data, which includes population and demographic data.

```{r, message = FALSE, warning= FALSE, message = FALSE, warning = FALSE, echo = FALSE}

read_csv("nhgis/nhgis_2010.csv") %>% 
  # Select and rename
  select(state = STATEA,
         county = COUNTYA,
         year = DATAYEAR,
         pop = CL8AA, # Total Population
         
         # Geography
         pop_urban = CL9AA, # Persons: Urban
         pop_urban_inside = CL9AB, # Persons: Urban--Inside urbanized areas
         pop_urban_outside = CL9AC, # Persons: Urban--Outside urbanized areas (in urban clusters)
         pop_rural = CL9AD, # Persons: Rural
         
         # Gender
         pop_men = CM0AA, # Persons: Male
         pop_women = CM0AB, # Persons: Female
         
         # Age
         pop_age_under_5 = CW3AA, # Persons: Under 5 years
         pop_age_65_69 = CW3AR, # Persons: 65 to 69 years
         pop_age_70_74 = CW3AS, # Persons: 70 to 74 years
         pop_age_75_79 = CW3AT, # Persons: 75 to 79 years
         pop_age_80_84 = CW3AU, # Persons: 80 to 84 years
         pop_age_85_plus = CW3AV, # Persons: 85 years and over
         
         # Race data
         pop_white = CM1AA, # Persons: White (single race)
         pop_black = CM1AB, # Persons: Black or African American (single race)
         pop_native = CM1AC, # Persons: American Indian and Alaska Native (single race)
         pop_asian = CM1AD, # Persons: Asian (single race)
         pop_pacific = CM1AE, # Persons: Native Hawaiian and Other Pacific Islander (single race)
         pop_other_race = CM1AF, # Persons: Some Other Race (single race)
         pop_race_two_or_more = CM1AG, # Persons: Two or More Races
        
         # Housing Data
         housing = CM7AA, # Housing units: Total
         housing_occupied = CM9AA, # Housing units: Occupied
         housing_vacant = CM9AB # Housing units: Vacant
  ) %>%
  # Now filter to just counties after 1990
  filter(year >= 1990) %>% # & state == "22") %>%
  # Get a single FIPS code for each county
  mutate(fips = paste(state, county, sep = "")) %>%
  # For each parish-year,
  mutate(
    # Calculate total population of age 65 or over
    pop_age_65_plus = pop_age_65_69 + pop_age_70_74 + 
      pop_age_75_79 + pop_age_80_84 + pop_age_85_plus,
    # Calculate total population under 5 or over 65,
    # also known as the dependent population (less mobile during crisis)
    pop_age_under_5_65_plus = pop_age_under_5 + pop_age_65_plus,
    # Calculate non-white population
    pop_non_white = pop - pop_white,
    # Calculate population that is neither white nor black
    pop_non_white_black = pop - pop_white - pop_black) %>%
  
  # Turn each of these into percentages of population
  mutate_at(vars("pop_urban", "pop_urban_inside", 
                 "pop_urban_outside", "pop_rural",
                 "pop_men", "pop_women",
                 "pop_age_65_plus", "pop_age_under_5_65_plus",
                 "pop_white", "pop_black", "pop_native", 
                 "pop_asian", "pop_pacific", "pop_other_race", 
                 "pop_race_two_or_more", 
                 "pop_non_white", "pop_non_white_black"),
  funs(. / pop * 100)) %>%
  mutate(
    # Get housing per capita
    housing_per_capita = housing / pop,
    # Get percentage of houses that are occupied vs. vacant
    housing_occupied = housing_occupied / housing * 100,
    housing_vacant = housing_vacant / housing * 100) %>%
  # Select any variables to exclude
  select(-c(pop_age_65_69, pop_age_70_74, pop_age_75_79, 
            pop_age_80_84, pop_age_85_plus, housing, state, county)) %>%
  # Pivot to tidy data, so we can then...
  pivot_longer(
    cols = -c(fips, year),
    names_to = "measure",
    values_to = "value") %>%
  # Pivot into wider, final data.frame
  pivot_wider(
    id_cols = fips,
    names_from = c(measure, year),
    values_from = value) %>%
  # Export
  write_csv("data/nhgis_standardized.csv")
```


Second, we'll format their nominal, unstandardized data, like income, which we definitely still need even if not standardized.

```{r, message = FALSE, warning= FALSE, message = FALSE, warning = FALSE, echo = FALSE}
read_csv("nhgis/nhgis_nominal.csv") %>%
  # Grab and rename variables
  select(
    year = YEAR,
    county = COUNTYFP,
    state = STATEFP,
#    median_age = AR9AA, # Median age: Persons
    pop_hisp_lat = A35AA, #  Persons: Hispanic or Latino
    pop_edu_high_school = BW7AC, #  Persons: 25 years and over ~ 12th grade or high school graduate, GED, or alternative, and less than 1 year of college
    pop_edu_1_3_college = BW7AD, #  Persons: 25 years and over ~ 1 to 3 years of college (until 1980) or 1 or more years of college with no degree or with associate's degree (since 1990)
    pop_edu_college = BW7AE, #  Persons: 25 years and over ~ 4 or more years of college (until 1980) or bachelor's degree or higher (since 1990)
    pop_labor_force_civ = B84AC, #   Persons: 16 years and over ~ In labor force--Civilian
    pop_labor_force_civ_unemployed = B84AE, #   Persons: 16 years and over ~ In labor force--Civilian--Unemployed
    income_median_household = B79AA, # Median income in previous year: Households
    income_median_family = AB2AA, # Median income in previous year: Families
    income_per_capita = BD5AA, # Per capita income in previous year
    pop_poverty = CL6AA # Persons: Poverty status is determined ~ Income below poverty level
  ) %>%
  # Filter to parishes in or after 1990
  filter(year >= 1990) %>% # & state == "22") %>%
  mutate(
    # Unemployed persons per 1000 in the civilian labor force
    unemployment = pop_labor_force_civ_unemployed / pop_labor_force_civ * 1000,
    # Make a single fips code out of state and county codes
    fips = paste(state, county, sep = "")) %>%
  # Pivot into tidy format
  select(-c(state, county)) %>%
  pivot_longer(
    cols = -c(fips, year),
    names_to = "measure",
    values_to = "value") %>%
  # Pivot into wider format for easy joining
  pivot_wider(
    id_cols = fips,
    names_from = c(measure, year),
    values_from = value) %>%
  # Export
  write_csv("data/nhgis_nominal.csv")
```




Now, let's merge these two datasets from NHGIS together.

```{r, message = FALSE, warning= FALSE, message = FALSE, warning = FALSE, echo = FALSE}
dat = read_csv("data/raw_data.csv", 
               col_types = cols(
                 fips = col_character())) %>%
  select(fips) %>%
  # Import standardized NHGIS variables
  left_join(
    by = "fips",
    y = read_csv("data/nhgis_standardized.csv", 
                 col_types = cols(
                   fips = col_character()))) %>%
  # Import non-standardized NHGIS variables
  left_join(
    by = "fips",
    y = read_csv("data/nhgis_nominal.csv", 
                 col_types = cols(
                   fips = col_character())))



# Identify the names of variables that are missing more that 5% of their data
cut = dat %>%
  # Pivot into tidy format
  pivot_longer(
    cols = -c(fips),
    names_to = "measure",
    values_to = "value") %>%
  # Measure what percentage of data points are missing per variable
  group_by(measure) %>%
  summarize(missing = sum(if_else(is.na(value), 1, 0)) / n() ) %>%
  # Filter to those missing more than 5%
  filter(missing > 0.05) %>%
  # convert variable names to vector
  select(measure) %>% unlist()


# Keep only variables with little missing data
dat = dat %>%
  select(-cut) %>%
  # Final data transformations
  mutate(
    # Convert these measures to percentages of the population
    pop_hisp_lat_1990 = pop_hisp_lat_1990 / pop_1990 * 100,
    pop_hisp_lat_2000 = pop_hisp_lat_2000 / pop_2000 * 100,
    pop_hisp_lat_2010 = pop_hisp_lat_2010 / pop_2010 * 100,
    pop_edu_1_3_college_2000 = pop_edu_1_3_college_2000 / pop_2000 * 100,
    pop_edu_college_2000 = pop_edu_college_2000 / pop_2000 * 100,
    pop_poverty_1990 = pop_poverty_1990 / pop_1990 * 100,
    pop_poverty_2000 = pop_poverty_2000 / pop_2000 * 100) %>%
  # Export to file
  write_csv("nhgis_together.csv")


```

Let's also export some age-related data to help create our social capital variables.

```{r, message = FALSE, warning= FALSE}
# Calculate the voting-age eligible population for each county-decade
read_csv("nhgis/nhgis_2010.csv") %>% 
  select(state = STATEA,
         county = COUNTYA,
         year = DATAYEAR, 
         pop = CL8AA,
          # Age
        pop_age_under_5 = CW3AA, # Persons: Under 5 years
        pop_age_5_9 = CW3AB, # Persons: 5 to 9 years
        pop_age_10_14 = CW3AC, # Persons: 10 to 14 years
        pop_age_15_17 = CW3AD) %>% # Persons: 15 to 17 years
         
# Now filter to just counties after 1990
  filter(year >= 1990) %>% # & state == "22") %>%
  # Get a single FIPS code for each county
  mutate(fips = paste(state, county, sep = "")) %>%
  # For each parish-year,
  mutate(
    # Calculate total population over age 18,
    # Meaning they are voting-age eligible
    pop_age_eligible = pop - pop_age_under_5 - pop_age_5_9 - 
      pop_age_10_14 - pop_age_15_17) %>% 
  # Export
  write_csv("data/nhgis_age_eligible.csv")
```


## 2.4 1990s dataset




Using this, we can create a single dataset with time-invariant variables from 1990 and time-variant response variables describing recovery.

```{r, message = FALSE, warning= FALSE, echo = FALSE, message= FALSE, warning = FALSE}
# Grab the FIPS codes for our variables under study
read_csv("data/raw_data.csv", 
         col_types = cols(fips = col_character())) %>%
  select(fips) %>%
  # Import migration data for only those FIPS codes
  left_join(
    by = "fips",
    y = read_csv("data/migration_total.csv",
                 col_types = cols(
                   fips = col_character()))) %>%
  # Pivot into a tidy data.frame
  pivot_longer(
    cols = -fips,
    names_to = c("measure", "year"),
    names_sep = c(-5),
    values_to = "value") %>%
  mutate(year = year %>% str_remove("_")) %>%
  # Pivot out into panel data
  pivot_wider(
    id_cols = c(fips, year),
    names_from = measure,
    values_from = value) %>%
  # Now, in order to join decennial census data in,
  # Let's create a variable indicating which decade each year corresponds to.
  mutate(decennial = if_else(
    # If below 2000, give 1990
    year < 2000, "1990", if_else(
      # If below 2010, give 2000,
      year < 2010, "2000", 
      # If not below 2000 or 2010, give 2010 
      "2010"))) %>%
  
  
  
  
  
  # NEXT,
  # Let's reformat the NHGIS data into a dataset of parish-years
  # and join it into our data via decennial variables
  
  left_join(
    by = c("fips" = "fips"), #"decennial" = "year"),
    y = read_csv("data/nhgis_together.csv", 
                 col_types = cols(fips = col_character())) %>%
      # Pivot into parish-years tidy format
      pivot_longer(
        cols = -c(fips),
        names_to = c("measure", "year"),
        names_sep = c(-5),
        values_to = "value") %>%
      # fix year
      mutate(year = year %>% str_remove("_")) %>%
############################
### MAJOR CHANGE ####
############################
      # Filter to just data from the year 1990
      filter(year == "1990") %>%
      
        
      # Pivot out into parish-years panel format
      pivot_wider(
        id_cols = c(fips, year),
        names_from = "measure",
        values_from = "value") %>%
  
     select(-year)

  ) %>%
  # Mark which years are during/after the disaster
  mutate(disaster = if_else(year %>% as.numeric() >= 2005, "yes", "no")) %>%
  
  
  ## FINALLY,
  # Let's merge in our policy toolkit and disaster data
  left_join(
    by = c("fips" = "fips", "disaster" = "disaster"),
    
    # Let's merge in the disaster data for the year 2005 and afterwards
    y = read_csv("data/raw_data.csv", col_types = cols(fips = col_character())) %>%
      filter(disaster == "yes") %>% select(-parish)) %>%
  
  # Now, we need to edit the disaster variable, because currently it indicates
  # whether the case occurred before or after the disaster.
  # We need to it mean that they EXPERIENCED the disaster.
  # Fortunately, at this point, 
  # any case that didn't experience the disaster has its hard/soft policies marked as NA.
  # So, we can recode any case where their hard or soft policy is NA as "no",
  # the disaster didn't affect this place.
  mutate(disaster = if_else(is.na(hard), "no", disaster)) %>%
  
  # Several of these disaster variables will be NA for all cases.

  # Convert disaster numeric covariate data to 0 if NA
  mutate_at(
    vars("damaged", "damaged_percent"),
    funs(if_else(is.na(.), 0, .))) %>%
  # Convert all treatment variables to 1 if yes and 0 if no or NA
  mutate_at(
    vars("disaster", "hard", "soft", "state", "local", "coastal"),
    funs(
      # First, replace any NA with no
      if_else(is.na(.), "no", .) %>%
      # Second, recode from yes/no to 1/0
      recode(
        "yes" = 1,
        "no" = 0))) %>%
  # Create a unique identifier for each year
  mutate(id = paste(year, fips, sep = "") %>% as.numeric) %>%
  # Filter to 1996 (since numbers change greatly between 1995-1996)
  filter(year > 1995) %>%
  # Turn into percapita measures
    mutate(indiv_exemptions_in = indiv_exemptions_in / pop,
         indiv_exemptions_out = indiv_exemptions_out / pop,
         income_in = income_in / pop,
         income_out = income_out / pop) %>%
  # Export to file  
  write_csv("data/dataset_1990.csv")

```


## 3.4 Time Variant Dataset

We can *also* create a dataset with time-variant covariates and time-variant response variables describing recovery. In this dataset, the response variables vary annually, while the covariates vary by decade.

```{r, message = FALSE, warning= FALSE, echo = FALSE, message= FALSE, warning = FALSE}
# Repeat, with just one difference:
# Assign each decennial census's data to the corresponding year

library(tidyverse)

# Get fips codes of Louisiana parishes of interest
read_csv("data/raw_data.csv", 
         col_types = cols(fips = col_character())) %>%
  select(fips) %>%
  # Import Migration Data
  left_join(
    by = "fips",
    y = read_csv("data/migration_total.csv",
                 col_types = cols(
                   fips = col_character()))) %>%
  # Pivot to long-form
  pivot_longer(cols = -fips, names_to = c("measure", "year"), names_sep = c(-5), values_to = "value") %>%
  mutate(year = year %>% str_remove("_")) %>%
  # Pivot wider again
  pivot_wider(id_cols = c(fips, year), names_from = measure, values_from = value) %>%
  # Filter to 1996 (since numbers change greatly between 1995-1996)
  filter(year >= 1996) %>%
  # Create a decennial field
  mutate(decennial = if_else(year <= 1999, "1990", if_else(year <= 2009, "2000", "2010"))) %>%
  # export part A
  write_csv("data/time_variant_a.csv")


```


```{r, message = FALSE, warning= FALSE}
# Create demographics

read_csv("data/nhgis_together.csv") %>%
              pivot_longer(
                cols = -fips,
                names_to = c("measure", "year"),
                names_sep = -5,
                values_to = "value") %>%
              mutate(year = year %>% str_remove("_"),
                     fips = fips %>% str_pad(width = 5, side = "left", pad = "0")) %>%
              pivot_wider(
                id_cols = c(fips, year),
                names_from = measure,
                values_from = value) %>%
              
              # Note: Civilian Labor Force is missing in 2010
              # Note: Population in Poverty is missing in 2010
              # Note: Educated population is missing in 2010
              
              # These variables are not available for 2010
              select(-c(pop_labor_force_civ, pop_labor_force_civ_unemployed, 
                        pop_poverty, unemployment, pop_edu_high_school, 
                        pop_edu_1_3_college, pop_edu_college)) %>%
  write_csv("data/demographics.csv")

# Create socioeconomics
read_csv("data/nhgis_together.csv") %>%
              pivot_longer( cols = -fips, names_to = c("measure", "year"), names_sep = -5, values_to = "value") %>%
              mutate(year = year %>% str_remove("_"),
                     fips = fips %>% str_pad(width = 5, side = "left", pad = "0")) %>%
              pivot_wider(id_cols = c(fips, year), names_from = measure, values_from = value) %>%
              filter(year %in% c("1990", "2000")) %>%
              # Note: Civilian Labor Force is missing in 2010
              # Note: Population in Poverty is missing in 2010
              # Note: Educated population is missing in 2010
              
              # These variables are not available for 2010
              select(c(fips, year, pop_labor_force_civ, pop_labor_force_civ_unemployed, 
                        pop_poverty, unemployment)) %>%
  write_csv("data/socioeconomics.csv")

# Create education
read_csv("data/nhgis_together.csv") %>%
              pivot_longer(cols = -fips, names_to = c("measure", "year"), names_sep = -5, values_to = "value") %>%
              mutate(year = year %>% str_remove("_"),
                     fips = fips %>% str_pad(width = 5, side = "left", pad = "0")) %>%
              pivot_wider(id_cols = c(fips, year), names_from = measure, values_from = value) %>% 
              filter(year %in% c("2000")) %>%
              select(c(fips, year,
                pop_edu_high_school, pop_edu_1_3_college, pop_edu_college)) %>%
  write_csv("data/education.csv")
```












```{r, message = FALSE, warning= FALSE}
# I want to join only these variables together

read_csv("data/time_variant_a.csv") %>%
  mutate_at(vars(fips, year, decennial), as.character) %>%
  # Join in demographic variables
  left_join(by = c("fips", "decennial" = "year"),
            y =  read_csv("data/demographics.csv") %>%
              mutate_at(vars(fips, year), as.character)) %>%
  # Recode the decennial variable so that years in the 2010 decade will receive variables from 2000
  mutate(decennial = decennial %>% 
           recode(
             "2010" = "2000",
             "2000" = "2000",
             "1990" = "1990")) %>%
  # Join in socioeconomic variables
  left_join(by = c("fips", "decennial" = "year"),
            y = read_csv("data/socioeconomics.csv") %>%
              mutate_at(vars(fips, year), as.character)) %>%
  # Recode the decennial variable so that the decades 1990 and 2010 all receive variables from 2000
  mutate(decennial = decennial %>% 
           recode(
             "2010" = "2000",
             "2000" = "2000",
             "1990" = "2000")) %>%
  # Join in education variables
  left_join(by = c("fips", "decennial" = "year"),
            y = read_csv("data/education.csv") %>%
              mutate_at(vars(fips, year), as.character)) %>%
  # Export to file
  write_csv("data/time_variant_b.csv")
```



```{r, message = FALSE, warning= FALSE}

read_csv("data/time_variant_b.csv") %>%
  mutate_at(vars(fips, year, decennial), as.character) %>%
  # In order to merge in the right years, create a temporary year
  # specifying which years of crime data we want to append to
  mutate(year_merge = year %>% recode(
    "1996" = "1994", "1997" = "1997", "1998" = "1998", "1999" = "1999",
    "2000" = "2000", "2001" = "2001", "2002" = "2002", "2003" = "2002",
    "2004" = "2002", "2005" = "2002", "2006" = "2002", "2007" = "2002",
    "2008" = "2002", "2009" = "2009", "2010" = "2010", "2011" = "2011",
    "2012" = "2012", "2013" = "2013", "2014" = "2014", "2015" = "2014",
    "2016" = "2016", "2017" = "2016", "2018" = "2016", "2019" = "2016",
    "2020" = "2016")) %>%
  # Now merge in crime data
  left_join(by = c("fips", "year_merge" = "year"),
            
            y = read_csv("data/social_capital_proxies.csv") %>%
              # pivot it longer
              pivot_longer(
                cols = -fips,
                names_to = c("measure", "year"),
                names_sep = -5,
                values_to = "values") %>% 
              # fix year
              mutate(year = year %>% str_sub(2,5)) %>%
              
              # Filter to just crime rates
              filter(measure == "crime_rate") %>%
              
              # Then pivot it wider to merge in county-year values for each variable
              pivot_wider(
                id_cols = c(fips, year),
                names_from = measure,
                values_from = values)) %>%
  write_csv("data/time_variant_c.csv")
```

```{r, message = FALSE, warning = FALSE}
read_csv("data/time_variant_c.csv") %>%
  mutate_at(vars(fips,year,year_merge), as.character) %>%
  # Recode years to merge in voter turnout and winning party data
  mutate(year_merge = if_else(
    year <= 2003, "2000", if_else(
      year >= 2004 & year <= 2007, "2004", if_else(
        year >= 2008 & year <= 2011, "2008", if_else(
          year >= 2012 & year <= 2015, "2012", "2016"))))) %>% 

# Now merge in voting data
  left_join(by = c("fips", "year_merge" = "year"),
            
            y = read_csv("data/social_capital_proxies.csv") %>%
            # pivot it longer
            pivot_longer(
              cols = -fips,
              names_to = c("measure", "year"),
              names_sep = -5,
              values_to = "values") %>% 
              # fix year
              mutate(year = year %>% str_sub(2,5)) %>%
              
              # Filter to just voting data
              filter(measure %in% c("voter_turnout", "winning_party")) %>%

              # Then pivot it wider to merge in county-year values for each variable
              pivot_wider(
                id_cols = c(fips, year),
                names_from = measure,
                values_from = values)) %>%
  write_csv("data/time_variant_d.csv")

```

```{r, message=FALSE, warning=FALSE}

# No variable is missing more than 5% of data per year
# However, crime rates is missing nearly 25% of data points, 
# because some points were missing population for the rates.

# Use the following code commented out to see these variables.
#read_csv("time_variant_d.csv") %>%
#  select(-year_merge, -decennial) %>%
#  pivot_longer(
#    cols = -c(fips, year),
#    names_to = "measure",
#    values_to = "value") %>%
#  group_by(year, measure) %>%
#  summarize(
#    missing = sum(if_else(is.na(value), 1, 0)) / n() * 100)

# So let's impute the mean of each variable
#read_csv("data/time_variant_d.csv") %>%
#  mutate_at(vars(fips, year), as.character) %>%
#  select(-year_merge, -decennial) %>%
  # pivot longer
 # pivot_longer(
#    cols = -c(fips, year),
#    names_to = "measure",
#    values_to = "value") %>%
  # Impute mean
  #group_by(measure, year) %>%
  #mutate(value = if_else(
    # If the value is missing
  #  is.na(value) &
    # and If the missing data point is NOT an outcome,
  #  !measure %in% c("indiv_exemptions_in", "indiv_exemptions_out",
  #                  "income_in", "income_out"),
    # Then replace it with the mean of its variable from that year.
    # Otherwise, replace it with the original value.
  #  mean(value, na.rm = TRUE), value)) %>% 
  
  # pivot out wider into final format
 # pivot_wider(
#    id_cols = c(fips, year),
#    names_from = measure,
#    values_from = value) %>%
  # Export
#  write_csv("data/time_variant_e.csv")

```

```{r, message = FALSE, warning= FALSE}
# Import covariates over time
read_csv("data/time_variant_d.csv") %>%
  # Change unique ids and years to characters
  mutate_at(vars(fips,year), as.character) %>%
  select(-year_merge, -decennial) %>%
  # Create a post-disaster indicator
  mutate(disaster = if_else(year %>% as.numeric() >= 2005, "yes", "no")) %>%
  # Join in an indicator of the disaster
  left_join(
    by = c("fips" = "fips", "disaster" = "disaster"),
    y = read_csv("data/raw_data.csv", col_types = cols(fips = col_character())) %>%
      filter(disaster == "yes") %>% select(-parish)) %>%
  mutate(disaster = if_else(is.na(hard), "no", disaster)) %>%
  # Fix some damage level questions
  mutate_at(
    vars("damaged", "damaged_percent"),
    list(~if_else(is.na(.), 0, .))) %>%
  # Adapt these measures
  mutate_at(
    vars("disaster", "hard", "soft", "state", "local", "coastal"),
    list(~if_else(is.na(.), "no", .) %>% recode("yes" = 1, "no" = 0))) %>%
  # Join in the total number of disaster declarations per county per year
  left_join(by = c("fips" = "county", "year"),
            y = read_rds("data/fema_disasters.rds") %>% 
              group_by(year = as.character(year), county) %>%
              summarize(declarations = n()) %>%
              ungroup()) %>%
  # Replace NA with 0, because in this case, NA means zero declarations
  mutate(declarations = if_else(is.na(declarations), 0, as.numeric(declarations))) %>%
  # Make a unique ID
  mutate(id = paste(year, fips, sep = "") %>% as.numeric) %>%
  # Calculate net migration out of the parish per 1000 persons
  mutate(
    net_migration_in = (indiv_exemptions_in - indiv_exemptions_out) / pop * 1000,
    net_income_in = (income_in - income_out) / pop * 100, # fits better with log-scale
    net_returns_in = (returns_in - returns_out) / pop * 1000
  ) %>%
  # Export
  
  # Import these as numeric
  mutate_at(vars(year, disaster, soft, hard, state, local), as.double) %>% 
  # Make sure parish fips code is a unique 5 character code
  mutate(fips = fips %>% str_pad(width = 5, side = "left", pad = "0")) %>%
  # Make sure format is a data.frame, not a tibble
  as.data.frame() %>%
  # Add together the percentages of the population with some college education 
  # and a complete college education to get the true population with some college
  mutate(pop_edu_some_college = pop_edu_college + pop_edu_1_3_college) %>%
  
  mutate(
    # Identify those cases which adopted a soft or hard recovery policy
    softhard = soft * hard,
    # Identify parishes which adopted a hard state policy
    hardstate = hard * state,
    # Identify parishes which adopted a soft state policy
    softstate = soft * state,
    # Identify parishes which adopted a hard local policy
    hardlocal = hard * local,
    # Identify parishes which adopted a soft local policy
    softlocal = soft * local,
    # Identify parishes which adopted hard, soft, and local policy
    hardsoftlocal = hard* soft * local,
    # Identify parishes which adopted hard, soft, and state policy,
    hardsoftstate = hard* soft * state,
    # Identify parishes which adopted a soft, state, and local policy
    softstatelocal = soft * state * local,
    # Identify parishes which adopted hard, state, and local policy
    hardstatelocal = hard* state * local,
    # Identify parishes which adopted either a hard or soft policy
    either = if_else(soft == 1 | hard == 1, 1, 0)) %>%
  # Filter to years 1996-2018.
  filter(year %in% 1996:2018) %>%
  # 22107  (Tensas Parish) | 22035 (East Carroll)
  # Two counties are missing a total of 7-county years (1 for 22035, 6 for 22107, all post 2014.)
  # And 22031 (DeSoto Parish) experienced a change in income of 861,000 per capita in 2012, 
  # nearly 8 times that of any other parish (usual values are 1,000-10,000). This looks to be a data-error. 
  # Just to be safe, I am setting DeSoto's value in 2012 to be NA, to be imputed, so that it does not skew results.
  # Its other outcomes look fine though.
  mutate(net_income_in = if_else(year == 2012 & fips == 22031, NA_real_, net_income_in)) %>%
  # Let's also join in the actual names of each county
  left_join(by = c("fips" = "geoid"),
            y = read_rds("shapes/counties.rds") %>%
              as.data.frame() %>%
              select(geoid, name) %>%
              distinct()) %>%
  # Finally, let's join in a revised version of disaster damage
  # which includes more counties than before
  left_join(by = c("fips" = "geoid", "name"),
            y = read_csv("data/disaster_damage.csv") %>%
              mutate(geoid = as.character(geoid))) %>%
  select(-damaged_percent) %>%
  rename(damaged_percent = occupied_units_with_damage_pc,
         damaged_severe_percent = occupied_units_with_severe_damage_pc) %>%
  # Make sure our damage variable only has this value for years 2005 and later
  mutate_at(vars(damaged_percent, damaged_severe_percent),
            list(~case_when(is.na(.) ~ 0,  !is.na(.) & year >= 2005 ~ ., TRUE ~ 0))) %>%
#US Department of Housing and Urban Development [HUD]. (2006, February 12). Current Housing Unit Damage Estimates: Hurricanes Katrina, Rita, and Wilma. Office of Policy Development and Research. Accessed February 22, 2021. https://www.huduser.gov/publications/pdf/gulfcoast_hsngdmgest.pdf
  # Finally, Acadia was not one of the 20 parishes at the conference...
  # We're going to constrain our analysis to just those,
  # to evaluate the treatment effects for these relatively similar coastal counties
  # which were all invited to develop recovery policies at this conference
  mutate(disaster = if_else(name == "Acadia", 0, disaster)) %>%
  mutate_at(vars(local, hard, soft, state, either,
                 softhard,hardstate,softstate, hardlocal, softlocal,
                 hardsoftlocal, hardsoftstate, softstatelocal, hardstatelocal),
            funs(if_else(name == "Acadia", 0, .))) %>%
  # Finally, make it easier to model with population
  mutate(pop = pop / 1000) %>%
  arrange(fips, year) %>%  
  write_csv("dataset_1996_2018.csv")
```

```{r}
remove(cut, not, extract, dat)
```







# 4. Data

Next, we import our panel dataset. 

This dataset has several important traits. First, it includes entries for each of 64 Louisiana parishes between 1996 and 2018, totaling 1472 entries. Each parish-year contains four recovery outcomes, estimated using migration data from the Internal Revenue Service, which tracks how many people (and how much money) enters or leaves every US county during each tax filing year.

These recovery outcomes include the number of individual exemptions made on tax forms (roughly equivalent to the number of persons who moved) which *moved away from* the parish for any parish in the US, vs. the number of individual exemptions made which *moved to* the parish in the preceding tax year. They also include the number of dollars per capita which *moved away from* the parish with those migrants, or the dollars per capita which *moved to* the parish in that year. This gives us *human* and *financial* measures of annual recovery.

Why should we interpret the movement of people and money as recovery? When parishes lose people or money, this is a form of residents voting with their feet, chosing to relocate elsewhere rather than continuing living in their disaster struck community. However, parishes which see more people moving to them tend to be recovering better. As a result, we can read more in-migration as positive recovery and more out-migration as negative recovery.

```{r, message = FALSE, warning = FALSE}
dat <- read_csv("dataset_1996_2018.csv") 
dat %>%
  group_by(disaster) %>%
  count()

# Which ethnic groups should we include?
# African American, White, and Latino groups are largest.
# All others make up very small portions of the population

dat %>%
  group_by(year) %>%
  summarize(
    white = mean(pop_white),
    black = mean(pop_black),
    asian = mean(pop_asian),
    latino = mean(pop_hisp_lat),
    native = mean(pop_native))

# Given a limited number of terms we can include, these three seem most critical.
# Black, White, Latino
# Additionally, we cannot include education.
# pop_edu_some_college + (education is unit invariant; can't include)


```



# 5. Descriptive Analysis

In our data, up to 21 parishes were affected by the treatments. There were five basic treatments parishes could have received.

1. Parish is hit by Hurricane Katrina

2. Parish is hit by Hurricane Katrina and chooses a hard recovery policy toolkit

3. Parish is hit by Hurricane Katrina and chooses a soft recovery policy toolkit

4. Parish is hit by Hurricane Katrina, and state handles most recovery

5. Parish is hit by Hurricane Katrina, and local authorities handle most recovery

The remaining 43 Louisiana Parishes which were not hit directly by Hurricane Katrina in 2005 are potential comparison units from which the synthetic control can be constructed. We gathered data on these parish-years from the National Historical GIS center, which consolidates data from the 1990, 2000, and 2010 censuses, as well as annual recovery outcome data from the Internal Revenue Service's county migration datasets.

Did one of the treatments above have a statistically significant effect on recovery outcomes in parishes where it was introduced, compared to if it had not been?

We can see already some descriptive evidence of variation in recovery outcomes among parishes which adopted different recovery outcomes. However, each of these parishes is different, and may not be ideal cases to compare.

## 5.0 Icons

```{r}
# Get font awesome images

#install.packages(c("magick", "fontawesome", "rsvg"))
library(fontawesome)
library(magick)
library(rsvg)
#dir.create("fa")

#<img src="img_girl.jpg" alt="Girl in a jacket">
fa_png(name = 'fas fa-tools', 
       file = "fa/tools.png", 
       fill = '#648FFF',
       width = 100, height = 100)

fa_png(name = 'fas fa-people-arrows', 
       file = "fa/people-arrows.png",
       fill = '#DC267F', 
       width = 100, height = 100)

fa_png(name = 'fas fa-house-user', 
       file = "fa/house-user.png",
       fill = '#785EF0', 
       width = 100, height = 100)

fa_png(name = 'fas fa-flag', 
       file = "fa/flag.png",
       fill = '#FFB000', 
       width = 100, height = 100)


# Reimport as simple grey

#<img src="img_girl.jpg" alt="Girl in a jacket">
fa_png(name = 'fas fa-tools', 
       file = "fa/tools_grey.png", 
       fill = 'lightgrey',
       width = 100, height = 100)

fa_png(name = 'fas fa-people-arrows', 
       file = "fa/people-arrows_grey.png",
       fill = 'lightgrey', 
       width = 100, height = 100)

fa_png(name = 'fas fa-house-user', 
       file = "fa/house-user_grey.png",
       fill = 'lightgrey', 
       width = 100, height = 100)

fa_png(name = 'fas fa-flag', 
       file = "fa/flag_grey.png",
       fill = 'lightgrey', 
       width = 100, height = 100)


fa_png(name = 'fas fa-plus-circle', 
       file = "fa/plus-circle.png",
       fill = 'white', 
       width = 100, height = 100)

fa_png(name = 'fas fa-minus-circle', 
       file = "fa/minus-circle.png",
       fill = 'white', 
       width = 100, height = 100)




fa_png(name = 'fas fa-square', 
       file = "fa/square.png",
       fill = 'black', 
       width = 100, height = 100)
fa_png(name = 'fas fa-circle', 
       file = "fa/circle.png",
       fill = 'black', 
       width = 100, height = 100)
fa_png(name = 'fas fa-caret-up', 
       file = "fa/triangle.png",
       fill = 'black', 
       width = 100, height = 200)
image_read("fa/triangle.png") %>%
  image_trim() %>%
  image_write(path = "fa/triangle.png", format = "png", quality = 100)


fa_png(name = "fas fa-check-circle",
       file = "fa/check.png",
       fill = "black",
       fill_opacity = 0.75,
       width = 100, height = 100)

```

## 5.1. Map

```{r}
# Let's import several projections
library(tidyverse)
library(sf)
library(rgdal)
library(sfnetworks)

dir.create("shapes")

# WGS coordinate reference system
wgs <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
# North America Equal Area Conic
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"
# North American Equidistant Conic
aed <- "+proj=eqdc +lat_0=0 +lon_0=0 +lat_1=20 +lat_2=60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"

# Get county boundaries for every county in the US
tigris::counties(cb = TRUE, year = 2010) %>%
  st_as_sf() %>%
  st_transform(crs = wgs) %>%
  select(geoid = GEO_ID, name = NAME, area_land = CENSUSAREA, geometry) %>%
  mutate(geoid = str_sub(geoid, -5,-1)) %>%
  saveRDS("shapes/counties.rds")

# Get county centroid points!
read_rds("shapes/counties.rds") %>%
  # Transform to equal area conic
  st_transform(crs = aea) %>%
  # Get centroid for each geoid-name pair
  group_by(geoid, name) %>%
  summarize(geometry = st_centroid(geometry, of_largest_polygon = TRUE)) %>%
  ungroup() %>%
  # Transform back to WGS
  st_transform(crs = wgs) %>%
  saveRDS("shapes/counties_centroids.rds")



counties <- read_rds("shapes/counties.rds") %>%
  filter(str_sub(geoid, 1,2) == "22") %>%
  # Join in damage levels
  left_join(by = c("geoid" = "fips"),
            y = read_csv("dataset_1996_2018.csv") %>%
              filter(year >= 2006) %>%
              select(fips, damaged_percent, disaster) %>%
              distinct() %>%
              mutate(fips = str_pad(fips, width = 5, side = "left", pad = "0"))) %>%
  mutate(disaster = if_else(disaster == 1, "Treated (n = 20)", "Control Group (n = 41)"))


keyplaces <- read_rds("shapes/counties.rds") %>%
  filter(str_sub(geoid, 1,2) == "22") %>%
  filter(name %in% c("St. Bernard", "Washington", "Plaquemines")) %>%
  mutate(case = name %>% recode_factor(
    "Washington" = "Soft & Local",
    "Plaquemines" = "Hard",
    "St. Bernard" = "Hard, Soft, & Local"))


mycolors <- c("#648FFF", "#785EF0","#DC267F","#FE6100","#FFB000")

# Get labels by point, to use shadow text
mypoints <- counties %>%
  filter(disaster == "Treated (n = 20)")  %>%
  group_by(geoid) %>%
  summarize(name = unique(name),
            geometry = st_centroid(geometry),
            x = st_coordinates(geometry)[,1],
            y = st_coordinates(geometry)[,2]) %>%
  as_tibble()  %>%
  mutate(name = str_replace_all(name, "Jefferson Davis", "Jefferson\nDavis"))

g1 <- ggplot() +
  geom_sf(data = counties, fill = NA, color = "darkgrey", size = 5) +
  # Indicate counties under study via damage rates
  geom_sf(data = counties, mapping = aes(fill = damaged_percent), 
          color = "grey", size = 0.5) +
  scale_fill_gradient(
    low = "white", high = mycolors[2],
    breaks = c(0, 25, 50, 75, 90),
    labels = c("0 (Min)", "25", "50", "75", "90 (Max)"),
    limits = c(0, 90.2),
    guide = guide_colorbar(order = 2, frame.colour = "black", show.limits = TRUE,
                           frame.linewidth = 1.5, 
                           ticks.colour = "black",
                           ticks.linewidth = 2)) +
  # Overlay on our 20 parishes of interest
  geom_sf(data = counties %>%
            filter(disaster == "Treated (n = 20)"), 
          mapping = aes(color = factor(disaster)), 
          fill = NA,size = 0.5) +
  scale_color_manual(values = "black", guide = guide_legend(order = 1)) +
  # Add names
  #geom_sf_label(data = counties %>%
  #               filter(disaster == "Treated (n = 20)"), 
  #              mapping = aes(label = str_replace_all(
  #                name, "Jefferson Davis", "Jefferson\nDavis")), 
  #             check_overlap = TRUE, size = 2.5, color = "black") +
  ##########
  # THEMES
  ##########
  theme_void(base_size = 14) +
  theme(legend.position = c(0.85,0.80),
        plot.subtitle = element_text(hjust = 0.20),
        plot.caption = ggtext::element_markdown(size = 11, hjust = 0),
        panel.spacing = unit(0, "cm"),
        plot.margin = margin(0,0,0,0,"cm")) +
  coord_sf(xlim = c(-93.90, -89.0),
           ylim = c(29.05, 32.9)) +
  ############
  # Labels
  ############
  labs(fill = "(%) Housing\nUnits Damaged", color = "LA Speaks Day Parishes",
       subtitle = "Parishes by Damage Level",
       caption = "<b>Black outlines</b> show parishes represented at LA Speaks Day,
       <br>
       which serve as the potentially treated group in this study.
       <br>
       <b>Grey shaded parishes</b> indicate parishes excluded from synthetic control group
       <br>
       due to missing outcome data (<i>Tensas, East Carroll, and DeSoto Parishes</i>).")+
  shadowtext::geom_shadowtext(
    data = mypoints, mapping = aes(x = x, y = y, label = name),
    #check_overlap = TRUE,
    bg.color = "white", bg.r = 0.3, hjust = 1, nudge_x = 0.25, fontface = "bold", 
    color = "black", size = 3) +
  ggspatial::annotation_scale(pad_y = unit(1, "cm"),bar_cols = c("darkgrey", "white"),
                              height = unit(0.25, "cm"), pad_x = unit(2, "cm")) +
  ggspatial::annotation_north_arrow(pad_y = unit(1, "cm"),
    height = unit(0.75, "cm"), width = unit(0.75, "cm"))

ggsave(g1, filename = "viz/figure_1.png", dpi = 500, width = 6, height = 6)
```


## 5.2 Average Recovery Outcomes by Policy Toolkit

```{r, message = FALSE, warning= FALSE}
library(tidyverse)
library(ggtext)
library(ggallin) # for psuedo-log transformation 
# (it's where you apply the log to both negative and positive values, 
# by transforming the negatives to positives first, and then adjusting)
# It makes it possible to plot extremely skewed distributions like net-migration

# Visualize it
viz <- read_csv("dataset_1996_2018.csv") %>%
  mutate(disaster = if_else(disaster == 1, "Disaster Affected", "Not Affected")) %>%
  mutate(net_income_in = net_income_in) %>%
# Get outcomes
  select(fips, year, 
         `Net Inmigration \n per 1,000 people` = net_migration_in, 
         `Net Income Inflow \n $ per 100 people` = net_income_in, 
         hard, soft, local, state, disaster) %>%
  pivot_longer(
    cols = -c(fips, year, hard, soft, local, state, disaster),
    names_to = "outcome",
    values_to = "value")

# Get the mean outcomes by policy  
vizpol = viz %>%
  filter(disaster == "Disaster Affected") %>%
  group_by(year, outcome) %>%
  summarize(
    Hard = mean(if_else(hard == 1, value, NA_real_), na.rm = TRUE),
    Soft = mean(if_else(soft == 1, value, NA_real_), na.rm = TRUE),
    State = mean(if_else(state == 1, value, NA_real_), na.rm = TRUE),
    Local = mean(if_else(local == 1, value, NA_real_), na.rm = TRUE)) %>%
  pivot_longer(
    cols = -c(year, outcome),
    names_to = "disaster",
    values_to = "value") %>%
  mutate(policy = factor(disaster, levels = c("Hard", "Soft", "Local", "State")))

mycolors <- c("#648FFF", "#785EF0","#DC267F","#FE6100","#FFB000")

mylines <- data.frame(x = c(1996, 2018),
                      y = c(0, 0))


mycolors <- c("#648FFF", "#785EF0","#DC267F","#FE6100","#FFB000")

g1 <- ggplot() +
  # Get solid axis line
  geom_line(data = mylines,
            mapping = aes(x = x, y = y),
            color = "#C5C6D0", linetype = "solid", size = 2, alpha = 0.5) +
  # Get a nice x-intercept
  geom_vline(xintercept = 2005, color = "#C5C6D0", linetype = "solid", size = 2, alpha = 0.5) +
  # Overlay tiny lines
  geom_line(data = viz, 
            mapping = aes(x = year, y = value, 
                          group = fips, alpha = disaster),
            color = "#9897A9", size = 0.5) +             #
  scale_alpha_manual(
    breaks = c("Disaster Affected", "Not Affected"),
    values = c(0.7, 0.1),
    guide = "none") +
  scale_y_continuous(trans = ggallin::pseudolog10_trans, 
                     breaks = c(-5000, -1000, -100, -30, -10, -3, 0, 3, 10, 30, 100, 1000, 5000)) +
  scale_x_continuous(
    breaks = seq(from = 1996, to = 2018, by = 2), 
    limits = c(1996, 2018), expand = expansion(add = 0)) +
  # Add in the mean outcomes over top
  geom_line(data = vizpol,
            mapping = aes(x = year, y = value, group = policy), color = "white", size = 2.5) +
  geom_line(data = vizpol,
            mapping = aes(x = year, y = value, group = policy, color = policy), size = 1.5) +
  # Customize color legend
  scale_color_manual(
    values = mycolors[c(1,3,2,5)],
    breaks = c("Hard", "Soft", "Local", "State"),
    labels = c("<img src='fa/tools.png' alt='tools' width='15' height='15'>
               <br>Hard",
               "<img src='fa/people-arrows.png' alt='tools' width='15' height='15'>
               <br>Soft",
               "<img src='fa/house-user.png' alt='tools' width='15' height='15'>
               <br>Local",
               "<img src='fa/flag.png' alt='tools' width='15' height='15'>
               <br>State"),
    guide = guide_legend(override.aes = list(size = 4))) +
  # Split into facets
  facet_wrap(~outcome, ncol = 1, scales = "free_y",strip.position = "right",
             labeller = labeller(outcome = c(
               "Net Income Inflow \n $ per 100 people" =  "<b>Net<br>
               Income<br>Inflow</b><br><i>$ per<br>100 people</i>", 
               "Net Inmigration \n per 1,000 people" = "<b>Net<br>
               Inmigration</b><br><i>per 1,000<br>people</i>"))) +
  # Add themes
  theme_minimal(base_size = 14) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(color = "#C5C6D0", size = 0.1),
        panel.border = element_rect(fill = NA, color = "#C5C6D0"),
        panel.spacing.y = unit(0.5, "cm"),
        strip.text.x = ggtext::element_markdown(size = 12, hjust = 0),
        strip.text.y = ggtext::element_markdown(size = 12, hjust = 0, angle = 0),
        plot.caption = ggtext::element_markdown(size = 11, hjust = 0),
        plot.title = ggtext::element_markdown(size = 14, hjust = 0.5),
        axis.title.y = ggtext::element_markdown(size = 12, hjust = 0.5),
        axis.text.y = ggtext::element_markdown(size = 11, hjust = 1),
        legend.text = ggtext::element_markdown(size = 12, hjust = 0.5),
        legend.box.margin = margin(0,0,0,0,"cm"), legend.margin = margin(0,0,0,0,"cm"),
        legend.position = "bottom") +
  
  labs(x = NULL, color = "Policy Toolkit", 
       y = "Recovery Indicator<br><i>on a Bi-directional Log-scale</i>",
       title = "Average Recovery Trajectory by Policy Toolkit",
       caption = "<b>4 colored lines</b> chart mean recovery over time per policy toolkit.
       <br>
       <b>Light grey</b> shows all cases, while <b>dark grey</b> shows disaster affected cases.") 

ggsave(g1, filename = "viz/figure_3.png", dpi = 500, width = 8, height = 5)

remove(viz, vizpol)
```

## 5.3 Policy Toolkit Adoption by Parish & Region

```{r}
# Replication of Nick's excellent visual
myviz <- read_csv("dataset_1996_2018.csv") %>%
  # Zoom into disaster hit parishes, just during a year following the storms
  filter(disaster == 1, year == 2005) %>%
  select(fips, region, hard, soft, state, local) %>%
  pivot_longer(
    cols = -c(fips, region),
    names_to = "policy",
    values_to = "adopted") %>%
  # Recode parish names
  mutate(county = fips %>% recode_factor(
    "22001" = "Acadia",
    "22003" =	"Allen",
    "22011" =	"Beauregard",
    "22019" =	"Calcasieu",
    "22023"	= "Cameron",
    "22045" = "Iberia",
    "22051" = "Jefferson",
    "22053" = "Jefferson Davis",
    "22057" = "Lafourche",
    "22071" = "Orleans",
    "22075" = "Plaquemines",
    "22087" = "St. Bernard",
    "22089" = "St. Charles",
    "22093" = "St. James",
    "22101" = "St. Mary",
    "22103" = "St. Tammany",
    "22105" = "Tangipahoa",
    "22109" = "Terrebonne",
    "22113" = "Vermilion",
    "22115" = "Vernon",
    "22117" = "Washington")) %>%
  # now reverse the order of parish names
  mutate(county = factor(county, levels = levels(county) %>% rev())) %>%
  filter(adopted == 1) %>%
    # Recode Policy names
  mutate(policy = policy %>% recode_factor(
    "hard" = "<img src='fa/tools.png' alt='tools' width='25' height='25'>
    <br>
    <b>Hard</b>
    <br>
    <b><span style='color:#648FFF'>80%</span></b>",
    "soft" = "<img src='fa/people-arrows.png' alt='tools' width='25' height='25'>
    <br>
    <b>Soft</b>
    <br>
    <b><span style='color:#DC267F'>50%</span></b>",
    "local" = "<img src='fa/house-user.png' alt='tools' width='25' height='25'>
    <br>
    <b>Local</b>
    <br>
    <b><span style='color:#785EF0'>85%</span></b>",
    "state" = "<img src='fa/flag.png' alt='tools' width='25' height='25'>
    <br>
    <b>State</b>
    <br>
    <b><span style='color:#E69F00'>20%</span></b>")) %>%
    # Recode Region names
  mutate(region = region %>% recode_factor(
    "Southeast" = "<b>Southeast<br>Louisiana</b><br><i>(n = 12)</i>",
    "Southwest" = "<b>Southwest<br>Louisiana</b><br><i>(n = 8)</i>")) %>%
  # Get total parishes in each region
  group_by(region) %>%
  mutate(total = length(unique(county))) %>%
  # Count parishes that adopted each policy, for each region
  group_by(region, policy) %>%
  mutate(count = n(),
         # Divide by total
         percent = count / total, 
         percent = paste(round(percent * 100, 0), "%", sep = "")) %>%
  ungroup() %>%
  mutate(y = case_when(
    str_detect(region, "east") ~ "St. Charles",
    str_detect(region, "west") ~ "Iberia"))

#out %>%
#  group_by(policy) %>%
#  count() %>%
#  mutate(percent = n / length(unique(out$county)))

# Need to add in an extra row to show the zero 0%
mylabels <- expand_grid(policy = unique(myviz$policy), 
            y = unique(myviz$y),
            region = unique(myviz$region)) %>%
  filter(str_detect(region, "west"), 
         str_detect(policy, "State"),
         str_detect(y, "Iberia")) %>%
  mutate(percent = "0%",
         county = y) %>%
  bind_rows(myviz) %>%
  mutate(y = factor(y, levels = levels(myviz$county))) %>%
  mutate(county = factor(county, levels = levels(myviz$county)))  %>%
  select(policy, region, percent, y) %>%
  distinct()


g1 <- myviz %>%
  ggplot(mapping = aes(x = policy, y = county,  fill = policy)) +
  geom_tile(color = "white", height = 0.8, width = 0.9, alpha = 0.25) +
  shadowtext::geom_shadowtext(data = mylabels, mapping = aes(
    x = policy, y = y, label = percent, color = policy), bg.color = "black", 
    fill = NA, label.color = NA, fontface = "bold", size = 9, bg.r = 0.02) +
  facet_grid(rows = vars(region), scales = "free", space = "free") +
  theme_classic(base_size = 14) +
  scale_x_discrete(position = "top") +
  theme(
    panel.border = element_rect(fill = NA, color = "black"),
    plot.title = ggtext::element_markdown(hjust = 0.5),
    axis.text.x.top = ggtext::element_markdown(size = 12, hjust = 0.5),
    axis.ticks.x = element_blank(),
    axis.line.x = element_blank(),
    strip.background = element_blank(),
    strip.text.y = ggtext::element_markdown(size = 12, angle = 0, hjust = 0)) +
  labs(x = NULL, y= NULL, 
       title = "Policy Toolkit Adoption by **Parish** and **Region**") +
  scale_fill_manual(values = c("#648FFF","#DC267F","#785EF0","#FFB000"), guide = "none") +
  scale_color_manual(values = c("#648FFF","#DC267F","#785EF0","#FFB000"), guide = "none")

ggsave(g1, filename = "viz/figure_2.png", dpi = 500, width = 6, height = 5)
#?shadowtext::geom_shadowtext
```



Crosstabulation

```{r}
dat <- read_csv("dataset_1996_2018.csv") %>%
  mutate_at(vars(hard,soft,state,local),
            funs(as.character(.)))
dat %>%
  filter(year == 2005) %>%
  filter(disaster == 1) %>%
  infer::chisq_test(formula = region ~ hard)

dat %>%
  filter(year == 2005) %>%
  filter(disaster == 1) %>%
  infer::chisq_test(formula = region ~ soft)

dat %>%
  filter(year == 2005) %>%
  filter(disaster == 1) %>%
  infer::chisq_test(formula = region ~ state)

dat %>%
  filter(year == 2005) %>%
  filter(disaster == 1) %>%
  infer::chisq_test(formula = region ~ local)

# Each of these tests shows a relatively insignificant relationship between policy toolkit adoption and geography.
```



## 5.4 Difference of Means Visualization

```{r}
library(tidyverse)
library(infer)
library(ggtext)
# Visualize it
viz <- read_csv("dataset_1996_2018.csv") %>%
  mutate(disaster = if_else(disaster == 1, "Disaster Affected", "Not Affected")) %>%
  mutate(net_income_in = net_income_in) %>%
# Get outcomes
  select(fips, year, 
         net_migration_in, net_income_in,
         hard, soft, local, state, disaster) %>%
  pivot_longer(
    cols = -c(fips, year, hard, soft, local, state, disaster),
    names_to = "outcome",
    values_to = "value") %>%
  pivot_longer(
    cols = c(hard, soft, local, state),
    names_to = "policy",
    values_to = "predictor")

diff_of_means = function(mydata){
  # Get difference of means
   mydiff <- mydata %>%
    group_by(predictor) %>%
    summarize(mean = mean(value, na.rm = TRUE)) %>%
    ungroup() %>%
    pivot_wider(names_from = predictor, values_from = mean) %>%
    mutate(diff = `1` - `0`) %>%
    select(diff) %>%
    unlist()
  # Run statistical test
  infer::t_test(formula = value ~ predictor, order = c("1", "0"), 
                     alternative = "two.sided", conf_level = 0.95, x = mydata) %>%
  mutate(diff = mydiff) %>%
    return()
}  

out <- viz %>% 
  filter(disaster == "Disaster Affected") %>%
  mutate(pair = paste(outcome, policy, year, sep = "-")) %>%
  split(.$pair) %>%
  map_dfr(~diff_of_means(.), .id = "pair") %>%
  separate(col = pair, into = c("outcome", "predictor", "year"), sep = "-") %>%
  mutate(predictor = predictor %>% recode_factor(
    "hard" = "Hard", "soft" = "Soft", 
    "local" = "Local", "state" = "State")) %>%
  # Make it easier to get rid of extra labels
  mutate(year = if_else(
    condition = str_detect(outcome, pattern = "income"), 
    true = as.numeric(year) - 2000, 
    false = as.numeric(year) ),
    year = factor(year)) %>% 
  mutate(predictor = factor(predictor, level = c("Hard", "Soft", "Local", "State") %>% rev())) %>%
  mutate(outcome = outcome %>% recode_factor(
    "net_income_in" = "<b>Net<br>Income<br>Inflow</b><br><i>$ per<br>100 people</i>",
      "net_migration_in" = "<b>Net<br>Inmigration</b><br><i>per<br>1,000 people</i>"))
  
mycolors <- c("#648FFF", "#785EF0","#DC267F","#FE6100","#FFB000")


g1 <-out %>%
  ggplot(mapping = aes(x = year, y = predictor, fill = statistic, 
                       label = round(diff, 1))) +
  geom_tile(height = 0.72, color = "#C5C6D0", fill = "#C5C6D0") +
  geom_tile(height = 0.7) +
  geom_text(color = "#373737") +
  scale_fill_gradient2(
    low = mycolors[4], high = mycolors[1], mid = "white", midpoint = 0,
    limits = c(-3, 3),
    breaks = c(-3, -2, -1, 0, 1, 2, 3),
    guide = guide_colorsteps(barheight = 1, barwidth = 10, show.limits = TRUE)) +
  scale_x_discrete(breaks = c(5:18, 2005:2018),
                   labels = c(rep("", 14), 2005:2018)) +
  scale_y_discrete(
    breaks = c("Hard", "Soft", "Local", "State"),
    labels = c("<img src='fa/tools_grey.png' alt='tools' width='15' height='15'>   Hard",
               "<img src='fa/people-arrows_grey.png' alt='tools' width='15' height='15'>   Soft",
               "<img src='fa/house-user_grey.png' alt='tools' width='15' height='15'>   Local",
               "<img src='fa/flag_grey.png' alt='tools' width='15' height='15'>   State")) +
  facet_wrap(~outcome, ncol = 1, scales = "free", strip.position = "right") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_blank(),
        panel.spacing.y = unit(0.0, "cm"),
        panel.spacing.x = unit(0.0, "cm"),
        plot.title = ggtext::element_markdown(size = 14, hjust = 0.5),
        axis.line.x.bottom = element_blank(), 
        strip.text.y = ggtext::element_markdown(size = 12, hjust = 0, angle = 0),
        strip.text.x = ggtext::element_markdown(size = 12, hjust = 0.5),
        strip.background = element_blank(),
        legend.title = ggtext::element_markdown(size = 12, hjust = 0.5),
        legend.text = ggtext::element_markdown(size = 10, hjust = 0),
        legend.position = "bottom", legend.box.margin = margin(0,0,0,0, "cm"),
        legend.margin = margin(0,0,0,0,"cm"),
        axis.ticks.x = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = ggtext::element_markdown(size = 11, hjust = 0),
        axis.line = element_blank(),
        plot.caption = ggtext::element_markdown(size = 10, hjust = 0)) +
  labs(y = "Policy Toolkit",
       x = NULL, fill = "<b>Standardized Difference of Means</b><br><i>(t-statistic)</i>",
       title = "<b>Difference in Average Annual Recovery by Policy</b>
       <br>
       <i>Among Disaster Struck Louisiana Parishes</i>",
       caption = "<b>Text</b> shows difference in mean parish recovery per policy per year. <b>Tile shading</b> shows strength of t-test's standardized test statistic. 
       <br>
       <b>Higher test statistics</b> (eg. +2 or -2) imply greater statistical significance; this better shows variation in significance<br>than the p-value, given each test's small sample size (n = 21).")

#<i class='fas fa-walking'></i>
#labels = c("<span color='#FE6100'><i>Negative</i></span>", 
#               "-2", "-1", "0", "+1", "+2", 
#               "<span color='#648FFF'><i>Positive</i></span>")
ggsave(g1, filename = "viz/figure_A1.png", dpi = 500, width = 9, height = 5)
```


# 6. Revised Models

We want to estimate treatment effects of policy toolkits while taking into account the differential effects of several different policy tools. However, a matching experiment by default is not really designed to allow for multiple treatments; since it builds a synthetic control group for comparison against.

## Run Models

```{r, eval = FALSE}
library(gsynth)

dat <- read_csv("dataset_1996_2018.csv") %>%
  mutate(time = as.numeric(year) - 2004) %>%
  group_by(fips) %>%
  mutate(lag1 = lag(net_migration_in, 1)) %>%
  ungroup() %>%
  # Get rid of first year, because it can't hold the lag
  filter(!is.na(lag1)) 

# Get parishes which adopted any specific policy
tr <- dat %>% filter(disaster == 1) %>% with(fips) %>% unique() %>% as.character()
# Get parishes unaffected / did not adopt a policy
ct <- dat %>% filter(year == 2010) %>% filter(disaster != 1) %>% with(fips) %>% unique() %>% as.character()

system.time(
  m1 <- dat %>%
    gsynth(
      formula = net_migration_in ~ disaster + lag1 +
        pop + pop_urban +
        # Vulnerability
        pop_women + pop_age_under_5_65_plus +
        pop_black + pop_hisp_lat +
        housing_vacant + 
        # Socioeconomics
        pop_poverty + unemployment + 
        # Social Capital
        crime_rate + voter_turnout + winning_party +
        # Damages
        damaged_percent + declarations,
      index = c("fips", "year"), na.rm = TRUE,
      force = "two-way", CV = TRUE, r = c(0,5),
      se = TRUE, inference = "parametric", 
      # Let's shoot for nboots == 1000
      nboots = 1000, parallel = TRUE, cores = 15, seed = 02139)
)
# Save overall synthetic control experiment to file
m1 %>% saveRDS("model/m.rds")

m1 <- read_rds("model/m.rds")

# When we compare this to a model testing JUST the hard policy cases
# against the control cases, we find that the coefficients are identical, 
# since the counterfactual model remains the same.
# only the avergage treatment effect changes, because the treated units change.
m2 <- dat %>%
    filter(fips %in% fips[hard == 1] | fips %in% ct) %>%
    gsynth(
      formula = net_migration_in ~ hard + lag1 +
        pop + pop_urban +
        # Vulnerability
        pop_women + pop_age_under_5_65_plus +
        pop_black + pop_hisp_lat +
        housing_vacant + 
        # Socioeconomics
        pop_poverty + unemployment + 
        # Social Capital
        crime_rate + voter_turnout + winning_party +
        # Damages
        damaged_percent + declarations,
      index = c("fips", "year"), na.rm = TRUE,
      force = "two-way", CV = TRUE, r = c(0,5),
      se = FALSE, inference = "parametric")

# All the same
m1$xi
m2$xi
m1$alpha.tr # same for shared cases
m2$alpha.tr # same for shared cases
m1$beta
m2$beta
m1$att.avg
m2$att.avg

#m1$eff[as.character(2005:2018), tr] %>% rowMeans() %>% mean()

# So, in essence, to test the effects of a bunch of different treatments, 
# we don't actually need a bunch of synthetic control models. We only need 1 per outcome.

remove(m1,m2)

# The quantities of interest can then be calculated for different treatment groups 
# from the same synthetic control model.


dat <- read_csv("dataset_1996_2018.csv") %>%
  mutate(time = as.numeric(year) - 2004) %>%
  group_by(fips) %>%
  mutate(lag1 = lag(net_income_in, 1)) %>%
  ungroup() %>%
  # Get rid of first year, because it can't hold the lag
  filter(!is.na(lag1)) 

system.time(
  i1 <- dat %>%
    gsynth(
      formula = net_income_in ~ disaster + lag1 +
        pop + pop_urban +
        # Vulnerability
        pop_women + pop_age_under_5_65_plus +
        pop_black + pop_hisp_lat +
        housing_vacant + 
        # Socioeconomics
        pop_poverty + unemployment + 
        # Social Capital
        crime_rate + voter_turnout + winning_party +
        # Damages
        damaged_percent + declarations,
      index = c("fips", "year"), na.rm = TRUE,
      force = "two-way", CV = TRUE, r = c(0,5),
      se = TRUE, inference = "parametric", 
      # Let's shoot for nboots == 1000
      nboots = 1000, parallel = TRUE, cores = 15, seed = 02139)
)
# Save overall synthetic control experiment to file
i1 %>% saveRDS("model/i.rds")

# When we compare this to a model testing JUST the hard policy cases
# against the control cases, we find that the coefficients are identical, 
# since the counterfactual model remains the same.
# only the avergage treatment effect changes, because the treated units change.
i2 <- dat %>%
    filter(fips %in% fips[hard == 1] | fips %in% ct) %>%
    gsynth(
      formula = net_income_in ~ hard + lag1 +
        pop + pop_urban +
        # Vulnerability
        pop_women + pop_age_under_5_65_plus +
        pop_black + pop_hisp_lat +
        housing_vacant + 
        # Socioeconomics
        pop_poverty + unemployment + 
        # Social Capital
        crime_rate + voter_turnout + winning_party +
        # Damages
        damaged_percent + declarations,
      index = c("fips", "year"), na.rm = TRUE,
      force = "two-way", CV = TRUE, r = c(0,5),
      se = FALSE, inference = "parametric")

i1$beta
i2$beta
i1$alpha.tr
i2$alpha.tr
i1$xi
i2$xi

rm(list=ls())
```

### Table A1

Let's make our model table!

```{r}
library(tidyverse)
library(gsynth)
library(broom)

get_table = function(model){
  print(model)[,1:5] %>% as_tibble(rownames = "term") %>%
    select(term, estimate = beta, se = 3, p_value = 6) %>%
    mutate(stars = gtools::stars.pval(p_value)) %>%
    mutate(label = paste(round(estimate, 2), stars, sep = ""),
           se = paste(" (", round(se, 2), ")", sep = ""),
           type = "Coefficients") %>%
    select(type, term, label, se) %>%
    bind_rows(
      tibble(type = "GOF", term = c("mu"), label = model$mu %>% round(2) %>% paste()),
      tibble(type = "GOF", term = c("MPSE"), label = model$MSPE %>% round(2) %>% paste()),
      tibble(type = "GOF", term = c("Sigma^2"), label = model$sigma2 %>% round(2) %>% paste()),
      tibble(type = "GOF", term = c("Fixed Effects"), label = "Two-Way" %>% paste()),
      tibble(type = "GOF", term = c("No. of Factors"), label = model$r.cv %>% paste()),
      
      tibble(type = "GOF", term = c("N. Parish-Years"), label = paste(dim(model$Y.dat)[1] * dim(model$Y.dat)[2])),
      tibble(type = "GOF", term = c("Control Units"), label = model$Nco %>% paste()),
      tibble(type = "GOF", term = c("Treated Units"), label = model$Ntr %>% paste()),
      tibble(type = "GOF", term = c("Years"), label = model$time %>% length() %>% paste()),
      tibble(type = "GOF", term = c("Treated Years"), label = c(2005:2018) %>% length() %>% paste())
    ) %>% return()
}


tab <- bind_rows(
  read_rds("model/m.rds") %>% get_table() %>% mutate(outcome = "mig"),
  read_rds("model/i.rds") %>% get_table() %>% mutate(outcome = "inc")
) %>%
  pivot_wider(id_cols = c(type, term), names_from = outcome, values_from = c(label, se)) %>%
  mutate_at(vars(se_mig, se_inc), ~if_else(is.na(.), true = "", false = .)) %>%
  select(type, Covariate = term, 
         `Estimate` = label_mig, `Std. Error` = se_mig, 
         `Estimate ` = label_inc, `Std. Error ` = se_inc) %>%
  select(-type) %>%
   mutate(Covariate = Covariate %>% recode(
    "lag1" = "Lagged Outcome (1-yr)",
    "pop" = "Population (1000s)",
    "pop_urban" = "% Urban",
    "pop_women" = "% Women",
    "pop_age_under_5_65_plus" = "% Vulnerable Age Groups",
    "pop_black" = "% Black",
    "pop_hisp_lat" = "% Hispanic / Latino",
    "housing_vacant" = "% Vacant Housing",
    "pop_poverty" = "% Under Poverty Line",
    "unemployment" = "Unemployment Rate",
    "crime_rate" = "Crime Rate",
    "voter_turnout" = "% Voter Turnout",
    "winning_party" = "% Vote for Winning Party",
    "damaged_percent" = "% Housing Units Damaged",
    "declarations" = "Disaster Declarations"))



tab %>% 
  kbl(caption = "Synthetic Control Models for Counterfactuals", booktabs = T, format = "latex") %>%
  add_header_above(c(" " = 1, "Net Income Inflow Rate (100s)" = 2, "Net In-Migration Rate (1000s)" = 2)) %>%
  add_header_above(c(" " = 1, "Beta Coefficients (Standard Error)" = 4)) %>%
  kable_styling(latex_options = c("hold_position")) %>%
  pack_rows("Coefficients", 1, 15, hline_before = FALSE, latex_gap_space = "2em") %>%
  pack_rows("Goodness of Fit", 16, 20, hline_before = FALSE, latex_gap_space = "2em") %>%
  pack_rows("Sample Size", 21, 25, hline_before = FALSE, latex_gap_space = "2em") %>%
  kableExtra::column_spec(1, border_right = TRUE) %>%
  kableExtra::column_spec(3, border_right = TRUE) %>%
  kableExtra::footnote(
    general = "Statistical Significance: *** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.10. Std. Errors and p-values based on 1000 bootstrapped resamples.",
    threeparttable = TRUE) %>%
  c() %>%
  # MUST MANUALLY ADD \textless by find and replace. Sorry!
  str_replace_all(pattern = "p < ", replacement = "p textless ") %>%
  cat(sep = "\n", fill = FALSE) 
```



## Extract Data

```{r}
dat <- read_csv("dataset_1996_2018.csv") %>%
  select(year, fips, disaster, hard, soft, state, local) %>%
  # Cut year not observed due to lag
  filter(year != "1996")

# Get parishes which adopted any specific policy
tr <- dat %>% filter(disaster == 1) %>% with(fips) %>% unique() %>% as.character()
# Get parishes unaffected / did not adopt a policy
ct <- dat %>% filter(year == 2010) %>% filter(disaster != 1) %>% with(fips) %>% unique() %>% as.character()

# Load in migration synthetic control model
m <- read_rds("model/m.rds")
# Next, we're going to extra our data into one large data.frame, so that id = 0 is the observed and id = 1:1000 are the bootrstrapped observations
1:1000 %>% map_dfr(~m$eff.boot[, tr, .x] %>% as_tibble(rownames = "time"), .id = "id") %>% 
  bind_rows(as_tibble(m$eff[,tr], rownames = "time") %>% mutate(id = "0")) %>%
  pivot_longer(cols = -c(id, time), names_to = "case", values_to = "boot") %>%
  mutate(time = if_else(as.numeric(time) < 1900, true = as.numeric(time) + 2004, false = as.numeric(time))) %>%
  # Finally, let's join in the extra policy toolkit data, which will be helpful to us later
  left_join(by = c("time" = "year", "case" = "fips"), 
            y = dat %>% mutate(fips = as.character(fips))) %>%
  saveRDS("model/mdat.rds")
remove(m)

# Load in migration synthetic control model
i <- read_rds("model/i.rds")

1:1000 %>% map_dfr(~i$eff.boot[, tr, .x] %>% as_tibble(rownames = "time"), .id = "id") %>% 
  bind_rows(as_tibble(i$eff[,tr], rownames = "time") %>% mutate(id = "0")) %>%
  pivot_longer(cols = -c(id, time), names_to = "case", values_to = "boot") %>%
  mutate(time = if_else(as.numeric(time) < 1900, true = as.numeric(time) + 2004, false = as.numeric(time))) %>%
  # Finally, let's join in the extra policy toolkit data, which will be helpful to us later
  left_join(by = c("time" = "year", "case" = "fips"), 
            y = dat %>% mutate(fips = as.character(fips))) %>%
  saveRDS("model/idat.rds")
remove(i)
rm(list=ls())
```

```{r}
# Finally, let's stop up on some stats and functions for later.

# Load in migration synthetic control model
dat <- read_csv("dataset_1996_2018.csv")
# vector of treated time periods
t <- as.character(2005:2018)
# Get parishes unaffected / did not adopt a policy
ct <- dat %>% filter(year == 2010) %>% filter(disaster != 1) %>% with(fips) %>% unique() %>% as.character()

# Get parishes hit by disaster, which adopted ANY type of policy.
tr.disaster <- dat %>% filter(year == 2010 & disaster == 1) %>% with(fips) %>% unique() %>% as.character()
# Get parishes which adopted a hard policy
tr.hard <- dat %>% filter(hard == 1) %>% with(fips) %>% unique() %>% as.character()
# Get parishes which adopted a soft policy
tr.soft <- dat %>% filter(soft == 1) %>% with(fips) %>% unique() %>% as.character()
# Get parishes which adopted a state policy
tr.state <- dat %>% filter(state == 1) %>% with(fips) %>% unique() %>% as.character()
# Get parishes which adopted a local policy
tr.local <- dat %>% filter(local == 1) %>% with(fips) %>% unique() %>% as.character()

# Last, Let's write a quick function to calculate quantities of interest for any set of bootstrapped results
get_stat = function(data, qi){
  data %>%
  rename(qi = qi) %>%
  summarize(
    estimate = qi[id == 0],
    se = sd(qi[id != 0]),
    lower_ci = quantile(qi[id != 0], probs = 0.025),
    upper_ci = quantile(qi[id != 0], probs = 0.975),
    p_value = 2*case_when(
      estimate > 0 ~ sum(qi[id != 0] < 0),
      estimate < 0 ~ sum(qi[id != 0] > 0))/n()) %>%
    return()
}

get_att = function(data, tr, timing){
  data %>% 
    # Zoom into the treated period
    filter(time %in% timing) %>%
    # Zoom into the treated cases
    filter(case %in% tr) %>%
    # Get average annual effect for this set of cases
    group_by(id, time) %>%
    # Let's average over cases
    summarize(att = mean(boot)) %>%
    ungroup() %>%
    # Get average annual treatment effect over time
    group_by(id) %>%
    summarize(att.avg = mean(att)) %>%
    ungroup() %>%
    return()
}

get_fd = function(data, x1, x2){
  data %>%
    rename(
      x1 = x1,
      x2 = x2) %>%
    mutate(fd = x2 - x1) %>%
    get_stat(qi = "fd") %>%
    mutate(type = paste(x2, "-", x1, sep = " ")) %>%
    return()
}

save(t, ct, tr.disaster, tr.hard, tr.soft, tr.state, tr.local, get_stat, get_att, get_fd, file = "model/metadata.RData")
rm(list=ls())
```

## Residualize Policy/Disaster Controls

Normally, a synthetic control experiment creates an as-if perfect control group against which to compare the treated variable, but in this case, there's still just a little issue, which is that we need to adjust for multiple treatments. The treatment group was subject to several different treatments, so we're going to add terms to the counterfactual model to parse out the effect of these extra treatments. For any one treatment, we need to also parse out 4 out of the following 5 treatments: the effect of the disaster, hard policies, soft policies, state policies, and local policies. Since any case may have adopted up to all 5 of these, we need to flexibly account for them.

To do so, let's get our residuals from the original counterfactual model, saved in ```boot```, where ```id == 0``` is the original observed values and ```id %in% 1:1000``` are the bootstrapped models' residuals.

```{r}
library(tidyverse)
library(broom)
load("model/metadata.RData")

get_res = function(m){
  m <- m %>%
    mutate(time = time - 2004)
  
  # To estimate the ACTUAL treatment effect of disasters/hard/soft/state/local
  bind_rows(
    m %>% group_by(id) %>%
      mutate(delta = lm(formula = boot ~ hard * time + soft * time + state * time + local * time + 
                          factor(time) + factor(case) - time - 1)$residual,
             tr = "disaster"),
    m %>% group_by(id) %>%
      mutate(delta = lm(formula = boot ~ disaster + soft * time + state  * time + local * time + 
                          factor(time)  + factor(case) - time - 1)$residual,
             tr = "hard"),
    m %>% group_by(id) %>%
      mutate(delta = lm(formula = boot ~ disaster + hard * time + state * time + local * time + 
                          factor(time) + factor(case) - time - 1)$residual,
             tr = "soft"),
    m %>% group_by(id) %>%
      mutate(delta = lm(formula = boot ~ disaster + hard * time + soft * time + local * time + 
                          factor(time) + factor(case) - time - 1)$residual,
             tr = "state"),
    m %>% group_by(id) %>%
      mutate(delta = lm(formula = boot ~ disaster +  hard * time + soft * time + state * time + 
                          factor(time) + factor(case) - time - 1)$residual,
             tr = "local")
  ) %>%
    # Pivot out into a wide matrix
    pivot_wider(id_cols = c(id, time, case), names_from = tr, names_prefix = "d_", 
                values_from = delta, values_fill = list(delta = 0)) %>% 
    # Join back in indicator for which case is which, and the original estimate
    left_join(by = c("id", "time", "case"), y = m %>% select(id, time, case, boot, disaster:local))  %>%
    # Arrange observations
    select(id, time, case, boot, d_disaster, disaster, d_hard, hard, d_soft, soft, d_state, state, d_local, local) %>%
    mutate(time = time + 2004) %>%
    return()
}


# Get observed treatment
y <- read_rds("model/m.rds")$Y.tr %>% 
  as_tibble(rownames = "time") %>%
  pivot_longer(cols = -c(time), names_to = "case", values_to = "ytr") %>%
  mutate(time = as.numeric(time))

# Save output as mres, for model residuals
read_rds("model/mdat.rds") %>%
  get_res() %>%
  left_join(by = c("time", "case"), y = y) %>%
  saveRDS("model/mres.rds")


# Get observed treatment
y <- read_rds("model/i.rds")$Y.tr %>% 
  as_tibble(rownames = "time") %>%
  pivot_longer(cols = -c(time), names_to = "case", values_to = "ytr") %>%
  mutate(time = as.numeric(time))

# Repeat for net income flow
read_rds("model/idat.rds") %>%
  get_res() %>%
  left_join(by = c("time", "case"), y = y) %>%
  saveRDS("model/ires.rds")

rm(list = ls())

```



### Tables A2-A3

Next, let's get the residualizer models.

```{r}
library(tidyverse)
library(broom)
library(texreg)


get_p = function(data, qi){
  data %>% 
    rename(qi = estimate) %>% 
    summarize(
      estimate = qi[id == 0],
      se = sd(qi[id != 0]),
      statistic = estimate / se,
      p_value = 2*pt(abs(statistic), df = unique(df), lower.tail = FALSE)) %>%
    return()
}

get_table = function(data, myformula){
  
# Get original model
m <- data %>% 
  filter(id == 0) %>% 
  lm(formula = myformula, data = .)

# Get model stats
g <- m %>%
  glance() %>%
  select(r.squared, sigma, df.residual, nobs) %>%
  pivot_longer(cols = -c(), names_to = "term", values_to = "estimate")

# Get bootstrapped stats
tab <- data %>% 
  split(.$id) %>%
  map(~lm(formula = myformula, data = .)) %>%
  map_dfr(~tidy(.), .id = "id") %>%
  mutate(df = m$df.residual) %>%
  group_by(term) %>%
  get_p(qi = "estimate")

# Return all
bind_rows(tab, g) %>%
  return()
}



dis <- boot ~ hard * time + soft * time + state * time + local * time + 
                      factor(time) + factor(case) - time - 1
hard <- boot ~ disaster  + soft * time + state  * time + local * time + 
                      factor(time)  + factor(case) - time - 1
soft <- boot ~ disaster  + hard * time + state * time + local * time + 
                      factor(time) + factor(case) - time - 1
state <- boot ~ disaster  + hard * time + soft * time + local * time + 
                      factor(time) + factor(case) - time - 1
local <- boot ~ disaster  +  hard * time + soft * time + state * time + 
                      factor(time) + factor(case) - time - 1

dat <- read_rds("model/mdat.rds")

d1 <- bind_rows(
  get_table(dat, myformula = dis),
  get_table(dat, myformula = hard),
  get_table(dat, myformula = soft),
  get_table(dat, myformula = state),
  get_table(dat, myformula = local),
  .id = "type") %>%
  mutate(type = type %>% recode("1" = "disaster", "2" = "hard", "3" = "soft", "4" = "state", "5" = "local"))

dat <- read_rds("model/idat.rds")

d2 <- bind_rows(
  get_table(dat, myformula = dis),
  get_table(dat, myformula = hard),
  get_table(dat, myformula = soft),
  get_table(dat, myformula = state),
  get_table(dat, myformula = local),
  .id = "type") %>%
  mutate(type = type %>% recode("1" = "disaster", "2" = "hard", "3" = "soft", "4" = "state", "5" = "local"))

bind_rows(d1 %>% mutate(outcome = "net_migration_in"),
          d2 %>% mutate(outcome = "net_income_in")) %>%
  saveRDS("model/residualizer.rds")

# Get goodness of fit


rm(list=ls())
```

```{r}
dat <- read_rds("model/
                residualizer.rds") %>%
 select(type, outcome, term, estimate, se, p_value = 6) %>%
  filter(type != "disaster") %>%
    mutate(stars = gtools::stars.pval(p_value)) %>%
    mutate(label = paste(round(estimate, 2), stars, sep = ""),
           se = paste(" (", round(se, 2), ")", sep = "")) %>%
  filter(str_detect(term, "factor", negate = TRUE))  %>%
  mutate(outcome = outcome %>% recode("net_migration_in" = "mig", "net_income_in" = "inc")) %>%
  pivot_wider(id_cols = c(outcome, term), names_from = type, values_from = c(label, se)) %>%
  mutate(term = term %>% recode_factor(
    "hard:time" = "Hard x Year",
    "soft:time" = "Soft x Year",
    "time:soft" = "Soft x Year",
    "time:local" = "Local x Year",
    "time:state" = "State x Year",
    "hard" = "Hard",
    "soft" = "Soft",
    "local" = "Local",
    "state" = "State",
    "disaster" = "Disaster",
    "r.squared" = "R2",
    "sigma" = "Sigma",
    "df.residual" = "Residual df",
    "nobs" = "N"),
    order = as.numeric(term)) %>%
  arrange(desc(outcome), order) %>%
  mutate_at(vars(label_hard, label_soft, label_local, label_state,
                 se_hard, se_soft, se_local, se_state), list(~if_else(is.na(.), " ", .)))  %>%
  group_by(outcome, term, order) %>%
  summarize_at(
    vars(label_hard, label_soft, label_local, label_state,
                 se_hard, se_soft, se_local, se_state),
    list(~unique(.) %>% paste(collapse = " "))) %>%
  ungroup() %>%
  arrange(desc(outcome)) %>%
  mutate_at(vars(label_hard, label_soft, label_local, label_state,
                 se_hard, se_soft, se_local, se_state), 
            list(~str_remove(., "[(]NA[)]"))) %>%
    select(outcome, term, `Estimate` = label_hard, `(SE)` = se_hard, 
         `Estimate ` = label_soft, `(SE) ` = se_soft, 
         `Estimate  ` = label_local, `(SE)  ` = se_local, 
         `Estimate   ` = label_state, `(SE)   ` = se_state) 


dat %>% 
  filter(outcome == "mig") %>%
  select(-outcome) %>%
  kbl(caption = "OLS Difference-in-Differences Model for Extracting Residual Treatment Effects",
      booktabs = T, format = "latex") %>%
  add_header_above(c(" " = 1, "Hard Policy" = 2, "Soft Policy" = 2, 
                     "Local Policy" = 2, "State Policy" = 2)) %>%
  add_header_above(c(" " = 1, "Dependent Variable: Counterfactual Net-In-migration Rate (1000s) for Specified Policy Treatment" = 8)) %>%
  kable_styling(latex_options = c("hold_position")) %>%
  pack_rows("Confounders over Time", 1, 4, hline_before = FALSE, latex_gap_space = "2em") %>%
  pack_rows("Direct Counfounders", 5, 9, hline_before = FALSE, latex_gap_space = "2em") %>%
  pack_rows("Goodness of Fit", 10, 13, hline_before = FALSE, latex_gap_space = "2em") %>%
  kableExtra::column_spec(1, border_right = TRUE) %>%
  kableExtra::footnote(
    general = c("Statistical Significance: *** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.10. Std. Errors and p-values based on 1000 bootstrapped resamples.",
                "All models contain two-way fixed effects by parish and year."),
    threeparttable = TRUE) %>%
  c() %>%
  # MUST MANUALLY ADD \textless by find and replace. Sorry!
  str_replace_all(pattern = "p < ", replacement = "p textless ") %>%
  cat(sep = "\n", fill = FALSE) 

dat %>% 
  filter(outcome == "inc") %>%
  select(-outcome) %>%
  kbl(caption = "OLS Difference-in-Differences Model for Extracting Residual Treatment Effects",
      booktabs = T, format = "latex") %>%
  add_header_above(c(" " = 1, "Hard Policy" = 2, "Soft Policy" = 2, 
                     "Local Policy" = 2, "State Policy" = 2)) %>%
  add_header_above(c(" " = 1, "Dependent Variable: Counterfactual Net-Income Inflow Rate (100s) for Specified Policy Treatment" = 8)) %>%
  kable_styling(latex_options = c("hold_position")) %>%
  pack_rows("Confounders over Time", 1, 4, hline_before = FALSE, latex_gap_space = "2em") %>%
  pack_rows("Direct Counfounders", 5, 9, hline_before = FALSE, latex_gap_space = "2em") %>%
  pack_rows("Goodness of Fit", 10, 13, hline_before = FALSE, latex_gap_space = "2em") %>%
  kableExtra::column_spec(1, border_right = TRUE) %>%
  kableExtra::footnote(
    general = c("Statistical Significance: *** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.10. Std. Errors and p-values based on 1000 bootstrapped resamples.",
                "All models contain two-way fixed effects by parish and year."),
    threeparttable = TRUE) %>%
  c() %>%
  # MUST MANUALLY ADD \textless by find and replace. Sorry!
  str_replace_all(pattern = "p < ", replacement = "p textless ") %>%
  cat(sep = "\n", fill = FALSE) 


```




## Estimates

Let's get average annual treatment effects, with bootstrapped cases.

```{r}

m <- read_rds("model/mres.rds")

# They're additive. It makes no difference what order you do it in.
m %>% 
  filter(case %in% tr.soft) %>%
  filter(id == 0) %>%
  group_by(id,time) %>%
  summarize(d_soft = mean(d_soft)) %>%
  ungroup() %>%
  summary()

m %>% 
  filter(case %in% tr.soft) %>%
  filter(id == 0) %>%
  group_by(id,time) %>%
  summarize(d_soft = mean(ytr) - mean(ytr - d_soft)) %>%
  ungroup() %>%
  summary()

```


### Tidy Yearly Cases

```{r}
load("model/metadata.RData")

# Get for each treatment the residual effects

# SOFT - big dip in first year, gets better after
get_long = function(data, var, tr){
  data %>%
    filter(case %in% tr) %>%
    mutate(yct = ytr - !!sym(var)) %>%
    select(id, time, case, ytr, yct) %>%
    # For the treatment group, 
    # get the average treatment and counterfactual outcomes 
    # per time slice, per bootstrap
    group_by(id, time) %>%
    summarize(ytr = mean(ytr),
              yct = mean(yct)) %>%
    ungroup() %>%
    # And pivot into a long data.frame
    pivot_longer(cols = c(ytr, yct), names_to = "group", values_to = "y") %>%
    return()
}

tr.wa <- "22117" 
#22117 # washington, soft
#22117 # washington, local
tr.pla <- "22075"
# 22075# plaquemines, hard
tr.st <- "22087" 
# 22087 # st. bernard # soft
# 22087 # st. bernard # local 
# 22087 # st. bernard # hard

# Get for migration
m <- read_rds("model/mres.rds")

d1 <- bind_rows(
  get_long(m, var = "d_soft", tr = tr.wa),
  get_long(m, var = "d_local", tr = tr.wa),
  get_long(m, var = "d_hard", tr = tr.pla),
  
  get_long(m, var = "d_soft", tr = tr.st),
  get_long(m, var = "d_local", tr = tr.st),
  get_long(m, var = "d_hard", tr = tr.st),
  .id = "type") %>% 
  mutate(type = type %>% recode(
    "1" = "Washington-Soft Policy",
    "2" = "Washington-Local Policy",
    "3" = "plaquemines-Hard Policy",
    "4" = "St. Bernard-Soft Policy",
    "5" = "St. Bernard-Local Policy",
    "6" = "St. Bernard-Hard Policy"))

# Repeat for income
m <- read_rds("model/ires.rds")
d2 <- bind_rows(
  get_long(m, var = "d_soft", tr = tr.wa),
  get_long(m, var = "d_local", tr = tr.wa),
  get_long(m, var = "d_hard", tr = tr.pla),
  
  get_long(m, var = "d_soft", tr = tr.st),
  get_long(m, var = "d_local", tr = tr.st),
  get_long(m, var = "d_hard", tr = tr.st),
  .id = "type") %>% 
  mutate(type = type %>% recode(
    "1" = "Washington-Soft Policy",
    "2" = "Washington-Local Policy",
    "3" = "plaquemines-Hard Policy",
    "4" = "St. Bernard-Soft Policy",
    "5" = "St. Bernard-Local Policy",
    "6" = "St. Bernard-Hard Policy"))

bind_rows(d1 %>% mutate(outcome = "net_migration_in"),
          d2 %>% mutate(outcome = "net_income_in")) %>%
  separate(col = type, into = c("name", "treatment"), sep = "-") %>%

  saveRDS("model/long_cases.rds")


rm(list = ls())
```



### Tidy Yearly Lines

```{r}
load("model/metadata.RData")

# Get for each treatment the residual effects

# SOFT - big dip in first year, gets better after
get_long = function(data, var, tr){
  data %>%
    filter(case %in% tr) %>%
    mutate(yct = ytr - !!sym(var)) %>%
    select(id, time, case, ytr, yct) %>%
    # For the treatment group, 
    # get the average treatment and counterfactual outcomes 
    # per time slice, per bootstrap
    group_by(id, time) %>%
    summarize(ytr = mean(ytr),
              yct = mean(yct)) %>%
    ungroup() %>%
    # And pivot into a long data.frame
    pivot_longer(cols = c(ytr, yct), names_to = "group", values_to = "y") %>%
    return()
}


# Get for migration
m <- read_rds("model/mres.rds")
d1 <- bind_rows(
  get_long(m, var = "d_soft", tr = tr.soft),
  get_long(m, var = "d_hard", tr = tr.hard),
  get_long(m, var = "d_state", tr = tr.state),
  get_long(m, var = "d_local", tr = tr.local),
  .id = "type") %>% 
  mutate(type = type %>% recode("1" = "soft", "2" = "hard",
                                "3" = "state", "4" = "local"))

# Repeat for income
m <- read_rds("model/ires.rds")
d2 <- bind_rows(
  get_long(m, var = "d_soft", tr = tr.soft),
  get_long(m, var = "d_hard", tr = tr.hard),
  get_long(m, var = "d_state", tr = tr.state),
  get_long(m, var = "d_local", tr = tr.local),
  .id = "type") %>% 
  mutate(type = type %>% recode("1" = "soft", "2" = "hard",
                                "3" = "state", "4" = "local")) 

bind_rows(d1 %>% mutate(outcome = "net_migration_in"),
          d2 %>% mutate(outcome = "net_income_in")) %>%
  saveRDS("model/long.rds")


rm(list = ls())
```

### Yearly Bands

```{r}
load("model/metadata.RData")

read_rds("model/long.rds") %>%
  group_by(time, outcome, group, type) %>%
  # Get confidence intervals (but cut the p-value, since it doesn't mean anything here)
  get_stat(qi = "y") %>% select(-p_value) %>%
  ungroup() %>%
  saveRDS("model/bands.rds")

read_rds("model/bands.rds") %>%
  ggplot(mapping = aes(x = time, y = estimate, ymin = lower_ci, ymax = upper_ci, color = group, fill =group)) +
  geom_ribbon(alpha = 0.25) +
  geom_line() +
  facet_grid(outcome ~ type) +
  scale_y_continuous(trans = ggallin::pseudolog10_trans)
```

### Yearly Effects

```{r}
read_rds("model/long.rds") %>%
  pivot_wider(id_cols = c(outcome, type, time, id), names_from = group, values_from = y) %>%
  mutate(eff = ytr - yct) %>%
  saveRDS("model/eff.rds")

read_rds("model/eff.rds") %>%
  group_by(outcome, type, time) %>%
  get_stat(qi = "eff") %>%
  ungroup() %>%
  saveRDS("model/eff_bands.rds")
```

### Cumulative Effects

```{r}
load("model/metadata.RData")
get_cumulative = function(data, i){
  bind_rows(
    data %>%
      mutate(boot = d_soft) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.soft, timing = t[c(1:i)]) %>%
      get_stat(qi = "att.avg"),
    
    data %>%
      mutate(boot = d_hard) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.hard, timing = t[c(1:i)]) %>%
      get_stat(qi = "att.avg"),
    
    data %>%
      mutate(boot = d_state) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.state, timing = t[c(1:i)]) %>%
      get_stat(qi = "att.avg"),
    
    data %>%
      mutate(boot = d_local) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.local, timing = t[c(1:i)]) %>%
      get_stat(qi = "att.avg"),
    
    .id = "type") %>% 
    mutate(type = type %>% recode("1" = "soft", "2" = "hard", "3" = "state", "4" = "local")) %>%
    return()
}

m <- read_rds("model/mres.rds")
d1 <- 1:length(t) %>%
  map_dfr(~get_cumulative(m, i = .), .id = "time")

m <- read_rds("model/ires.rds")
d2 <- 1:length(t) %>%
  map_dfr(~get_cumulative(m, i = .), .id = "time")

bind_rows(d1 %>% mutate(outcome = "net_migration_in"),
          d2 %>% mutate(outcome = "net_income_in")) %>%
  saveRDS("model/cumulative_att.rds")

rm(list=ls())

read_rds("model/cumulative_att.rds")  %>%
  ggplot(mapping = aes(x = as.numeric(time), y = estimate, ymin = lower_ci, ymax = upper_ci)) +
  geom_col() +
  geom_linerange() +
  facet_grid(outcome~type, scales = "free_y")
```

### Period Effects

```{r}
load("model/metadata.RData")

get_summary = function(data, i){
  bind_rows(
    data %>%
      mutate(boot = d_soft) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.soft, timing = i) %>%
      get_stat(qi = "att.avg"),
    
    data %>%
      mutate(boot = d_hard) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.hard, timing = i) %>%
      get_stat(qi = "att.avg"),
    
    data %>%
      mutate(boot = d_state) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.state, timing = i) %>%
      get_stat(qi = "att.avg"),
    
    data %>%
      mutate(boot = d_local) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.local, timing = i) %>%
      get_stat(qi = "att.avg"),
    
    .id = "type") %>% 
    mutate(type = type %>% recode("1" = "soft", "2" = "hard", "3" = "state", "4" = "local")) %>%
    return()
}


# First 2 years versus next 13

myrange <- tibble(group  = "3-14", timing = c(2007:2018) %>% as.character()) %>%
  bind_rows(tibble(group = "1-2", timing = c("2005", "2006")))

# This time, we're going to get the average effect every three years
m <- read_rds("model/mres.rds")
d1 <- myrange %>%
  split(.$group) %>% 
  map_dfr(~get_summary(m, i = .$timing), .id = "time")

# This time, we're going to get the average effect every three years
m <- read_rds("model/ires.rds")
d2 <- myrange %>%
  split(.$group) %>% 
  map_dfr(~get_summary(m, i = .$timing), .id = "time")

bind_rows(d1 %>% mutate(outcome = "net_migration_in"),
          d2 %>% mutate(outcome = "net_income_in")) %>%
  saveRDS("model/period_att2_14.rds")

```

### Period Effect Bootstrap

```{r}

load("model/metadata.RData")

get_summary = function(data, i){
  bind_rows(
    data %>%
      mutate(boot = d_soft) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.soft, timing = i),
    
    data %>%
      mutate(boot = d_hard) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.hard, timing = i),
    
    data %>%
      mutate(boot = d_state) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.state, timing = i),
    
    data %>%
      mutate(boot = d_local) %>%
      select(id, time, case, boot) %>%
      get_att(tr = tr.local, timing = i),
    
    .id = "type") %>% 
    mutate(type = type %>% recode("1" = "soft", "2" = "hard", "3" = "state", "4" = "local")) %>%
    return()
}
# First 2 years versus next 13

myrange <- tibble(group  = "3-14", timing = c(2007:2018) %>% as.character()) %>%
  bind_rows(tibble(group = "1-2", timing = c("2005", "2006")))

# This time, we're going to get the average effect every three years
m <- read_rds("model/mres.rds")
d1 <- myrange %>%
  split(.$group) %>% 
  map_dfr(~get_summary(m, i = .$timing), .id = "time")

# This time, we're going to get the average effect every three years
m <- read_rds("model/ires.rds")
d2 <- myrange %>%
  split(.$group) %>% 
  map_dfr(~get_summary(m, i = .$timing), .id = "time")

bind_rows(d1 %>% mutate(outcome = "net_migration_in"),
          d2 %>% mutate(outcome = "net_income_in")) %>%
  saveRDS("model/period_att2_14_boot.rds")

rm(list=ls())
```


# 7. Visualization


## Figure 5

### Basic Concept

```{r}
read_rds("model/period_att2_14.rds") %>%
  mutate(time = str_remove(time, ".*-") %>% as.numeric()) %>%
#  filter(type == "soft") %>%
  ggplot(mapping = aes(x = factor(time), y = estimate, ymin = lower_ci, ymax = upper_ci, 
                       group = time, fill = factor(time) )) +
  geom_crossbar(position = position_dodge2(width = 0.5), alpha = 0.5)  +
  coord_flip() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  facet_grid(outcome~type, scales = "free_x")

read_rds("model/period_att2_14.rds") %>%
  mutate(time = str_remove(time, ".*-") %>% as.numeric()) %>%
#  filter(type == "soft") %>%
  ggplot(mapping = aes(x = type, y = estimate, ymin = lower_ci, ymax = upper_ci, 
                       group = time, fill = factor(time) )) +
  geom_crossbar(position = position_dodge2(width = 0.5), alpha = 0.5)  +
  coord_flip() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  facet_grid(as.factor(time)~outcome, scales = "free_x")
```

### Table A4

```{r}
library(knitr)
library(kableExtra)


tab <- bind_rows(
  read_rds("model/cumulative_att.rds") %>%
    filter(time == 14) %>%
    mutate(time = "Overall"),
  read_rds("model/period_att2_14.rds")) %>%
  mutate(type = type %>% recode_factor("hard" = "Hard", "soft" = "Soft", "local" = "Local", "state" = "State"),
         outcome = outcome %>% recode("net_income_in" = "inc", 
                                      "net_migration_in" = "mig"),
         time = time %>% recode_factor("Overall" = "2005-2018", "1-2" = "2005-2006", "3-14" = "2007-2018")) %>%
  mutate(order = type %>% as.numeric()) %>%
  arrange(time, outcome, order) %>%
  mutate(label = paste(  round(estimate, 2), gtools::stars.pval(p_value), sep = ""),
         ci = paste( "[", round(lower_ci, 2), " to ", round(upper_ci, 2), "]", sep = ""),
         p_value = if_else(p_value < 0.001, "p < 0.001", paste("p = ", round(p_value, 3), sep = ""))) %>%
  select(time, outcome, type, label, ci, p_value) %>%
  pivot_wider(id_cols = c(time, type), names_from = outcome, values_from = c(label, ci, p_value))  %>%
  select(Policy = type, Estimate = label_inc, `95% CI` = ci_inc, `p-value` = p_value_inc, 
         `Estimate ` = label_mig, `95% CI ` = ci_mig, `p-value ` = p_value_mig)


tab %>% kbl(caption = "Average Treatment Effects by Period", booktabs = T, format = "latex") %>%
  add_header_above(c(" " = 1, "Net Income Inflow Rate (100s)" = 3, "Net In-Migration Rate (1000s)" = 3)) %>%
  add_header_above(c(" " = 1, "Average Period Treatment Effects" = 6)) %>%
  kable_styling(latex_options = c("hold_position"))  %>%
  pack_rows("Overall Effects (2005-2018)", 1, 4, hline_before = FALSE, latex_gap_space = "2em") %>%
  pack_rows("Short Term Effects (2005-2006)", 5, 8, hline_before = FALSE, latex_gap_space = "2em") %>%
  pack_rows("Long Term Effects (2007-2018)", 9, 12, hline_before = FALSE, latex_gap_space = "2em") %>%
  kableExtra::column_spec(1, border_right = TRUE) %>%
  kableExtra::column_spec(4, border_right = TRUE) %>%
  kableExtra::footnote(
    general = "Statistical Significance: *** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.10. Confidence intervals and p-values based on 1000 bootstrapped resamples.",
    threeparttable = TRUE) %>%
  c() %>%
  # MUST MANUALLY ADD \textless by find and replace. Sorry!
  str_replace_all(pattern = "p < ", replacement = "p textless ") %>%
  cat(sep = "\n", fill = FALSE) 

```

### Plot

```{r}
library(tidyverse)
library(ggallin)
library(fontawesome)
library(rsvg)
library(magick)
library(ggridges)
library(ggnewscale)

mycolors <- c("#648FFF", "#785EF0","#DC267F","#FE6100","#FFB000")

myviz <- read_rds("model/period_att2_14.rds") %>%
  filter(time == "3-14") %>%
  mutate(treatment = type %>% recode_factor(
    "hard" = "Hard", "soft" = "Soft", "local" = "Local", "state" = "State")) %>%
  # Create p-values for transparency
  mutate(stars = gtools::stars.pval(p_value) %>% na_if(""),
         sig = case_when(
           is.na(stars) ~ "Insignificant",
           stars == "." ~ "p < 0.10",
           stars == "*" ~ "p < 0.05",
           stars ==   "**" ~ "p < 0.01",
           stars == "***" ~ "p < 0.001") %>%
           factor(levels = c("Insignificant", "p < 0.10", "p < 0.05",
                             "p < 0.01", "p < 0.001") %>% rev()),
         p_value = if_else(p_value < 0.001, "p < 0.001", paste("p = ", round(p_value, 3), sep = "")))  %>%
  ungroup() %>%
  mutate(group = paste(outcome, treatment)) %>%
  select(treatment, outcome, estimate, p_value, stars, sig,  group)




m <- read_rds("model/period_att2_14_boot.rds") %>%
  filter(id != 0) %>%
  filter(time == "3-14") %>%
  mutate(treatment = type %>% recode_factor(
    "hard" = "Hard", "soft" = "Soft", "local" = "Local", "state" = "State")) %>%
  select(outcome, treatment, boot = att.avg) %>%
  left_join(by = c("outcome", "treatment"),
            y = myviz %>% select(outcome, treatment, estimate)) %>%
  # Calculate 95% confidence intervals
  group_by(treatment, outcome) %>%
  mutate(ci_lower = quantile(boot, probs = 0.025),
         ci_upper = quantile(boot, probs = 0.975)) %>%
  ungroup()


d <- m %>%
  group_by(outcome) %>%
  mutate(n = round(max(boot) - min(boot), 0)*50) %>%
  group_by(treatment, outcome) %>%
  summarize(
    density(boot, n = n) %>%
      with(data.frame(x = x, y = y)),
    estimate = unique(estimate),
    diff = abs(estimate - x),
    xmin = x - 0.1,
    xmax = x + 0.1,
    ci_lower = unique(ci_lower),
    ci_upper = unique(ci_upper)
  ) %>%
  ungroup() %>%
  # Filter to within the 95% confidence interval
  group_by(outcome, treatment) %>%
  filter(x > ci_lower,
         x < ci_upper) %>%
  ungroup() %>%
  
  # Rescale max height of density plot, for each dependent variable (panel)
  # to avoid having density curves that are too pointy to see easily
  group_by(outcome) %>%
  # Adjust spacing
  mutate(ymin = 0,
         spacing = 0.50) %>%
  mutate_at(vars(ymin, y),
            list(~case_when(
              treatment == "Hard" ~ . + spacing*4,
              treatment == "Soft" ~ . + spacing*3,
              treatment == "Local" ~ . + spacing*2,
              treatment == "State" ~ . + spacing))) %>%
  # Now, just stretch each distribution ever so slightly upwards
  group_by(treatment, outcome) %>%
  mutate(y = scales::rescale(y, to = c(min(ymin), max(y) + 0.75)))

# Get me the middle and top of each spot
mypoints <- d %>%
  group_by(outcome, treatment) %>%
  summarize(ymin = min(ymin),
            ymax = max(y),
            y = unique(treatment) %>% dplyr::recode(
              "Hard" = 2.45, "Soft" = 1.8, 
              "Local" = 1.15, "State" = 0.5)) %>%
  left_join(by = c("outcome", "treatment"),
            y = myviz) %>%
  mutate(image = case_when(
    estimate > 0 ~ "<img src='fa/plus-circle.png' height='25' width='25'>",
    estimate < 0 ~ "<img src='fa/minus-circle.png' height='25' width='25'>")) %>%
  mutate(adjust = if_else(outcome == "net_migration_in" & treatment == "Soft", TRUE, FALSE)) %>%
  mutate(label = paste(if_else(estimate > 0, "+", "-"), round(estimate, 2), stars, sep = ""))

mylines <- mypoints %>%
  group_by(outcome) %>%
  summarize(ymin = min(ymin),
            ymax = max(ymax))
```


```{r}
g1 <- ggplot() +
  #############
  # Split into facets
  #############
  facet_grid(
    cols = vars(outcome), scales = "free",
    labeller = labeller(
      outcome = c(
        "net_income_in" = "<b>Net Income Inflow</b><br><i>$ per 100 residents</i>",
        "net_migration_in" = "<b>Net In-Migration</b><br><i>per 1000 residents</i>"))) +
  # Add a line indicating zero
  geom_linerange(data = mylines,
                 mapping = aes(x = 0, ymin = ymin, ymax = ymax), 
                 linetype = "solid", color = "grey", size = 2.5, alpha = 0.5) +

  ######### 
 # Hard
  ######### 
  geom_linerange(
    data = d %>% filter(treatment == "Hard"),
    mapping = aes(x = x, 
                  ymin = ymin, ymax = y, color = diff, group = treatment),
    alpha = 0.8) +
  geom_linerange(
    data = mypoints %>% filter(treatment == "Hard"),
    mapping = aes(x = estimate, ymin = ymin, ymax = ymax - 0.05, 
                  group = treatment), 
    size = 1, alpha = 1,  color = "#648FFF") +
  geom_richtext(
    data = mypoints %>% filter(treatment == "Hard"),
    mapping = aes(x = estimate, y = (ymax - ymin) / 2 + ymin, 
                  label = image, group = treatment), fill = NA, label.color = NA) +
  geom_line(
    data = d %>% filter(treatment == "Hard"),
    mapping = aes(x =x , y =y, group = treatment)) +
  scale_color_gradient(low = "#648FFF", high = "white", 
                       trans = ggallin::ssqrt_trans, guide = "none")  +

  new_scale("color") +
  ######### 
  # Soft
  ##########
  geom_linerange(
    data = d %>% filter(treatment == "Soft"),
    mapping = aes(x = x, 
                  ymin = ymin, ymax = y, color = diff, group = treatment),
    alpha = 0.8) +
  geom_linerange(
    data = mypoints %>% filter(treatment == "Soft") %>% filter(outcome == "net_income_in"),
    mapping = aes(x = estimate, ymin = ymin, ymax = ymax - 0.1, 
                  group = treatment), 
    size = 1, alpha = 1,  color = "#DC267F") +
 geom_linerange(
    data = mypoints %>% filter(treatment == "Soft") %>% filter(outcome == "net_migration_in"),
    mapping = aes(x = estimate, ymin = ymin, ymax = ymax - (ymax - ymin)/1.5, 
                  group = treatment), 
    size = 1, alpha = 1,  color = "#DC267F") +
 
  geom_richtext(
    data = mypoints %>% filter(treatment == "Soft"),
    mapping = aes(x = estimate, y = (ymax - ymin) / 4 + ymin, 
                  label = image, group = treatment), fill = NA, label.color = NA) +
  geom_line(
    data = d %>% filter(treatment == "Soft"),
    mapping = aes(x =x , y =y, group = treatment) ) +
  scale_color_gradient(low = "#DC267F", high = "white", 
                       trans = ggallin::ssqrt_trans, guide = "none")  +
  new_scale("color") +
  ######### 
  # Local
  ######### 
  geom_linerange(
    data = d %>% filter(treatment == "Local"),
    mapping = aes(x = x, 
                  ymin = ymin, ymax = y, color = diff, group = treatment),
    alpha = 0.8) +
   geom_linerange(
    data = mypoints %>% filter(treatment == "Local"),
    mapping = aes(x = estimate, ymin = ymin, ymax = ymax - 0.05, 
                  group = treatment), 
    size = 1, alpha = 1,  color = "#785EF0") +
 
  geom_richtext(
    data = mypoints %>% filter(treatment == "Local"),
    mapping = aes(x = estimate, y = (ymax - ymin) / 2 + ymin, 
                  label = image, group = treatment), fill = NA, label.color = NA) +
  geom_line(
    data = d %>% filter(treatment == "Local"),
    mapping = aes(x =x , y =y, group = treatment) ) +
  scale_color_gradient(low = "#785EF0", high = "white", 
                       trans = ggallin::ssqrt_trans, guide = "none")  +
  new_scale("color") +
  ######### 
  # State
  ######### 
  geom_linerange(
    data = d %>% filter(treatment == "State"),
    mapping = aes(x = x, 
                  ymin = ymin, ymax = y, color = diff, group = treatment),
    alpha = 0.8) +
   geom_linerange(
    data = mypoints %>% filter(treatment == "State"),
    mapping = aes(x = estimate, ymin = ymin, ymax = ymax - 0.05, 
                  group = treatment), 
    size = 1, alpha = 1,  color = "#FFB000") +
  geom_richtext(
    data = mypoints %>% filter(treatment == "State"),
    mapping = aes(x = estimate, y = (ymax - ymin) / 2 + ymin, 
                  label = image, group = treatment), fill = NA, label.color = NA) +
  geom_line(
    data = d %>% filter(treatment == "State"),
    mapping = aes(x =x , y =y, group = treatment) ) +
  scale_color_gradient(low = "#FFB000", high = "white", 
                       trans = ggallin::ssqrt_trans, guide = "none") +
  new_scale("color") +


  #############
  # AXES - Add some nice labels
  #############
   scale_y_continuous(
    breaks = c(0.5 + 0.7*3, 0.5 + 0.7*2, 0.5 + 0.7, 0.5),
    labels = c("<img src='fa/tools.png' alt='tools' width='25' height='25'>
               <br>Hard",
               "<img src='fa/people-arrows.png' alt='tools' width='25' height='25'>
               <br>Soft",
               "<img src='fa/house-user.png' alt='tools' width='25' height='25'>
               <br>Local",
               "<img src='fa/flag.png' alt='tools' width='25' height='25'>
               <br>State"))  +
  scale_x_continuous(
    breaks = c(-30, -25, -20, -15, -10, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 30)) +
  #############
  # themes
  #############
  theme(axis.text.y = ggtext::element_markdown(size = 12, hjust = 0.5),
        axis.ticks.y = element_blank(),
        axis.ticks.x = element_line(color = "grey", size = 0.2),
        strip.background = element_blank(),
        panel.background = element_blank(),
        strip.text.x = ggtext::element_markdown(size = 12, hjust = 0.5),
        panel.grid = element_blank(),
        plot.caption = ggtext::element_markdown(size = 11, hjust = 0),
        panel.spacing.x = unit(0.25, "cm"),
        plot.subtitle = ggtext::element_markdown(size = 14, hjust = 0.5),
        axis.line.x.bottom = element_line(color = "grey", size = 0.5)) +
  new_scale("color")  +
  #############
  # Coefficients
  #############
  shadowtext::geom_shadowtext(
    data = mypoints, 
    mapping = aes(x = estimate, y = (ymax - ymin)*4/5 + ymin, color = treatment, label = label),
    size = 5, bg.r = 0.15, bg.color = "white", hjust = 1) +
  shadowtext::geom_shadowtext(
    data = mypoints, 
    mapping = aes(x = estimate, y = (ymax - ymin)*4/5 + ymin, color = treatment, label = label),
    size = 5, bg.r = 0.05, hjust = 1) +
  #shadowtext::geom_shadowtext(
  #  data = mypoints %>%
  #    filter(adjust == FALSE), 
  #   mapping = aes(x = estimate, y = (ymax - ymin)/8 + ymin, color = treatment,
  #                  label = p_value), 
  #  color = "black", size = 3, bg.r = 0.1, bg.color = "white", hjust = 0) +
  # Adjust p-value positions that don't easily fit
  # shadowtext::geom_shadowtext(
  #  data = mypoints %>%
  #    filter(adjust == TRUE), 
  #    mapping = aes(x = estimate, y = (ymax - ymin)/8 + ymin, color = treatment,
  #                  label = p_value), 
  #  color = "black", size = 3, bg.r = 0.1, bg.color = "white", hjust = 0, nudge_x = 1) +

  scale_color_manual(
    breaks = c("Hard", "Soft", "Local", "State"),
    values = c("#648FFF", "#DC267F", "#785EF0", "#FFB000"),
    guide = "none")  +
  
    #############
  # Labels  
  #############
  labs(subtitle = "Average Treatment Effects of Policy Toolkits Long-Term (2007-2018)",
       y = NULL, x = "Average Treatment Effects (95% Bootstrapped Confidence Intervals)",
       caption = "<b>Statistical Significance</b>: *** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.10. Based on 1000 Bootstrapped Resamples.") +
  coord_cartesian(clip = "on")



ggsave(g1, filename = "viz/figure_4.png", dpi = 500, width = 9, height = 5)

rm(list=ls())
```



## Figure 6

#### Dataset Building

```{r}

lines <- read_rds("model/long.rds") %>% 
  select(year = time, type = group, outcome, treatment = type, y, id) %>%
  # Get observed means for treatment and counterfactual
  filter(id == 0) %>%
  mutate(type = type %>% recode("yct" = "counterfactual", "ytr" = "treatment"),
         color = case_when(
    type == "counterfactual" ~ "counterfactual",
    type != "counterfactual" ~ treatment))

# Which groups got the treatment? (1/0)
myatt <- read_rds("model/eff_bands.rds") %>%
  select(year = time, outcome, treatment = type, y = estimate) %>%
  mutate(color = "effect",
           type = "effect")


myeff <- read_rds("model/eff.rds") %>%
  # Get bootstrapped
  filter(id != 0) %>%
  select(outcome, treatment = type, replicate = id, year = time, y = eff) %>%
  # Now to get the color scale
  group_by(year, outcome, treatment) %>%
  arrange(y) %>%
  # Assign the highest value to the row of the next highest value
  mutate(ymax = lag(y, 1)) %>%
  # Then get rid of any now-useless rows lacking 2 complete values
  filter(!is.na(ymax)) %>%
  mutate(year_next = year + 1) %>%
  # arrange, annualy, from highest to lowest
  arrange(year, desc(ymax)) %>%
  group_by(year, outcome, treatment) %>%
  mutate(rank = as.numeric(1:n()),
         fill = (abs(rank - 500) + 500) / 1000) %>%
  ungroup() %>%
  # Filter in to just the 95% confidence interval
  filter(fill <= 0.95) %>%
  select(year, outcome, treatment, y, ymax, rank, fill) %>%
  # Add attributes
  rename(type = rank) %>% mutate(color = NA, type = as.character(type))

bind_rows(lines, myatt, myeff) %>%
  mutate(panel = paste(
    outcome,
    case_when(
      type %in% c(1:1000) ~ "effect",
      type == "effect" ~ "effect",
      type %in% c("counterfactual", "treatment") ~ "lines",
      TRUE ~ NA_character_),
    sep = "_"),
    panel = factor(panel, levels = c(
      "net_income_in_effect",
      "net_income_in_lines",
      "net_migration_in_effect",
      "net_migration_in_lines"))) %>% 
  mutate(color = factor(color, levels = c(
    "effect", "counterfactual", "hard", "soft", "local", "state"))) %>%
  mutate(treatment = factor(treatment, levels = c("hard", "soft", "local", "state"))) %>%
  saveRDS("model/manyplot.rds")

rm(list = ls())
```

#### Extra Icons

```{r}
library(magick)

# Hard
myicon <- image_read("fa/tools.png")

mycircle <- image_read("fa/minus-circle-black.png") %>%
  image_scale("50") 

magick::image_blank(width = 150, height = 100, color = "none") %>%
  image_background(color = "white") %>%
  image_composite(myicon, operator = "atop", offset = "+0+0") %>%
  image_composite(mycircle, offset = "+100+50") %>%
  image_transparent(color = "white") %>%
  image_write("fa/tools-result.png", format = "png", quality = 100)


# Soft
myicon <- image_read("fa/people-arrows.png")

mycircle <- image_read("fa/plus-circle-black.png") %>%
  image_scale("50") 

magick::image_blank(width = 150, height = 100, color = "none") %>%
  image_background(color = "white") %>%
  image_composite(myicon, operator = "atop", offset = "+0+0") %>%
  image_composite(mycircle, offset = "+100+50") %>%
  image_transparent(color = "white") %>%
  image_write("fa/people-arrows-result.png", format = "png", quality = 100)


# Local
myicon <- image_read("fa/house-user.png")

mycircle <- image_read("fa/plus-circle-black.png") %>%
  image_scale("50") 

magick::image_blank(width = 150, height = 100, color = "none") %>%
  image_background(color = "white") %>%
  image_composite(myicon, operator = "atop", offset = "+0+0") %>%
  image_composite(mycircle, offset = "+100+50") %>%
  image_transparent(color = "white") %>%
  image_write("fa/house-user-result.png", format = "png", quality = 100)

# State
myicon <- image_read("fa/flag.png")

mycircle <- image_read("fa/minus-circle-black.png") %>%
  image_scale("50") 

magick::image_blank(width = 150, height = 100, color = "none") %>%
  image_background(color = "white") %>%
  image_composite(myicon, operator = "atop", offset = "+0+0") %>%
  image_composite(mycircle, offset = "+100+50") %>%
  image_transparent(color = "white") %>%
  image_write("fa/flag-result.png", format = "png", quality = 100)

fa_png(name = "fas fa-question-circle", 
       file = "fa/question-circle.png",
       fill = "black", fill_opacity = 0.75)
```


#### Visual Data

```{r}
ci <- read_rds("model/manyplot.rds") %>%
  filter(type %in% 1:1000) 
  # Test
  #filter(type %in% c(75:100))


avg <- read_rds("model/manyplot.rds") %>%
  filter(type == "effect") 

l <- read_rds("model/manyplot.rds") %>%
  filter(type %in% c("counterfactual", "treatment")) 

myperiod <- read_rds("model/manyplot.rds")  %>%
  group_by(panel) %>%
  summarize(
    treatment = unique(treatment),
    ymin = min(y),
    ymax = max(y),
    xmin = 2005, xmax = 2018) %>%
  mutate(color = case_when(
    str_detect(panel, "lines") ~ "lines",
    str_detect(panel, "effect") ~ "effect") %>%
      factor(levels = c("lines", "effect")))

myaxis <- read_rds("model/manyplot.rds") %>%
  group_by(panel) %>%
  summarize(x = c(1997, 2018),
            y = c(0, 0)) %>%
  mutate(color = case_when(
    str_detect(panel, "lines") ~ "lines",
    str_detect(panel, "effect") ~ "effect") %>%
      factor(levels = c("lines", "effect")))


myimages <- myperiod %>% 
  filter(treatment %in% c("soft", "local")) %>%
  filter(color == "effect") %>%
  mutate(x = 2011, y = 0) %>%
  mutate(image = "<img src='fa/plus-circle-black.png' height='25' width='25'>")


myviz <- read_rds("model/period_att2_14.rds") %>%
  mutate(time = time %>% str_remove(".*-") %>% as.numeric()) %>%
  filter(time == max(as.numeric(time))) %>%
  rename(year = time, treatment = type) %>%
  # Order treatments
  mutate(treatment = treatment %>% recode_factor(
    "hard" = "Hard", "soft" = "Soft", "local" = "Local", "state" = "State")) %>%
  # Extract beta coefficient
  mutate(stars = gtools::stars.pval(p_value)) %>%
  mutate(p_value = paste("p = ", round(p_value, 3)))  %>%
  ungroup() %>%
  select(outcome, treatment, estimate, p_value, stars) %>%
  mutate(panel = paste(outcome, "effect", sep = "_"),
         panel = factor(panel, levels = c(
           "net_income_in_effect",
           "net_income_in_lines",
           "net_migration_in_effect",
           "net_migration_in_lines"))) %>%
  mutate(treatment = factor(tolower(treatment), levels = c("hard", "soft", "local", "state"))) %>%
  mutate(estimate = if_else(str_detect(estimate, "[-]"), 
                            true = paste(round(estimate, 2), stars),  
                            false = paste("+", round(estimate, 2), stars, sep = "")))
```

#### Plotting

```{r}
library(ggtext)
library(fontawesome)
library(ggallin)
library(ggpubr)
library(shadowtext)
library(ggnewscale)

g1 <- ggplot() +
   # Add theme
  theme_classic(base_size = 14) +
  theme(legend.position = "bottom",
        legend.box.margin = margin(0,0,0,0,"cm"),
        legend.box = "horizontal",
        legend.margin = margin(0,0,0,0, "cm"),
        legend.text = ggtext::element_markdown(size = 12, hjust = 0.5),

        strip.background = element_blank(),
        strip.text.y = ggtext::element_markdown(size = 12, hjust = 0, angle = 0),
        strip.text.x = ggtext::element_markdown(size = 13, hjust = 0.5),
        
        axis.title.y = ggtext::element_markdown(size = 12, hjust = 0.5),
        axis.text.x = element_text(size = 10),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks.y = element_line(size = 0.5, color = "grey"),
        axis.ticks.x = element_blank(),        
        
        panel.border = element_blank(),

        plot.caption = ggtext::element_markdown(size = 11, hjust = 0),
        plot.subtitle = ggtext::element_markdown(size = 14, hjust = 0.5),
        
        panel.spacing = unit(0.25, "cm")) +
  labs(
    subtitle = "Average Treatment Effects Over Time by Policy Toolkit",
    y = "**Recovery Indicator**<br>*(on a Bi-directional Log-Scale)*",
    x = NULL, fill = NULL, color = NULL,linetype = NULL,
    caption = NULL) +
  # Facet grid
  facet_grid(
    cols = vars(treatment), rows = vars(panel), scales = "free",
    labeller = labeller(
      panel = c(
        "net_income_in_effect" = "**Effect on<br>Income**<br>
            <img src='fa/check.png' height='25' width='25'>",
        "net_income_in_lines" = "<span style='color:#7F7D9C'>**Net<br>Income<br>Inflow<br>Compared**
            <br><i>$ per 100<br>residents</i></span>",
        "net_migration_in_effect" = "**Effect on<br>Inmigration**<br>
            <img src='fa/check.png' height='25' width='25'>",
        "net_migration_in_lines" = "<span style='color:#7F7D9C'>**Net<br>Inmigration<br>Compared**
            <br>*per 1000<br>residents*</span>"),
      treatment = c(
        "hard" = "<img src='fa/tools-result.png' alt='tools' width='75' height='50'>
          <br>*Hard Policy*",
        "soft" = "<img src='fa/people-arrows-result.png' alt='tools' width='75' height='50'>
          <br>*Soft Policy*",
        "local" = "<img src='fa/house-user-result.png' alt='tools' width='75' height='50'>
          <br>*Local Policy*",
        "state" = "<img src='fa/flag-result.png' alt='tools' width='75' height='50'>
          <br>
          *State Policy*"))) +
  scale_y_continuous(
    breaks = c(300, 100, 30, 10, 3, 0, -3, -10, -30, -100, -300, -1000),
    trans = ggallin::pseudolog10_trans, expand = expansion(add = 0.1)) +
  scale_x_continuous(breaks = seq(from = 1997, to = 2018, by = 6),
                     expand = expansion(add = 0)) +
  ###############################################
  # Identify the post-treatment period 
  ###############################################
  geom_rect(
    data = myperiod, 
    mapping = aes(xmin = 2005, xmax = xmax, ymin = ymin, ymax = ymax, 
                  fill = "early"), alpha = 0.25) + 
  geom_rect(
    data = myperiod, 
    mapping = aes(xmin = 2007, xmax = xmax, ymin = ymin, ymax = ymax, 
                  fill = "late"), alpha = 0.25) + 
  scale_fill_manual(
    values = c("darkgrey", "black"), name = NULL, 
    breaks = c("early", "late"), 
    labels = c("Short-Term<br>2005-2006", "Long-Term<br>2007-2018"), guide = "none"
    ) +
  new_scale("fill") +
  ###############################################
  # Plot the zero intercept 
  ###############################################
  geom_line(
    data = myaxis, mapping = aes(x = x, y = y, color = color), 
    size = 1, alpha = 0.5)   +
  scale_color_manual(
    values = c("grey", "black"), breaks = c("lines", "effect"),
    guide = "none")  +

  ###############################################
  # Plot border 
  ############################################### 
  new_scale("color") +
  geom_rect(
    data = myperiod, 
    mapping = aes(xmin = 1996, xmax = xmax, ymin = ymin, ymax = ymax, 
                  color = color),fill = NA, size = 1.5) + 
  scale_color_manual(
    values = c("black", "grey"), breaks = c("effect", "lines"),
    guide = "none")  +


  ###############################################
  # Add confidence intervals
  ###############################################
  # Hard
  geom_ribbon(
    data = ci %>%
      filter(treatment == "hard"),
    mapping = aes(x = year, ymin = y, ymax = ymax, fill = fill, group = type)) +
  scale_fill_gradient(low = "white", high = "#648FFF", guide = "none") +
  # Soft
  new_scale("fill") +
  geom_ribbon(
    data = ci %>%
      filter(treatment == "soft"),
    mapping = aes(x = year, ymin = y, ymax = ymax, fill = fill, group = type)) +
  scale_fill_gradient(low = "white", high = "#DC267F", guide = "none") +
  # Local
  new_scale("fill") +
  geom_ribbon(
    data = ci %>%
      filter(treatment == "local"),
    mapping = aes(x = year, ymin = y, ymax = ymax, fill = fill, group = type)) +
  scale_fill_gradient(low = "white", high = "#785EF0", guide = "none") +
  # Soft
  new_scale("fill") +
  geom_ribbon(
    data = ci %>%
      filter(treatment == "state"),
    mapping = aes(x = year, ymin = y, ymax = ymax, fill = fill, group = type)) +
  scale_fill_gradient(low = "white", high = "#FFB000", guide = "none") +
  new_scale("fill") +
  
  ###############################################
  # Add white outlines for treatment/counterfactual lines
  ###############################################
  new_scale("color") +
  geom_line(
    data = l,
    mapping = aes(x = year, y = y, group = type, size = type),
    color = "white")  +
  # Add sizing for lines
  scale_size_manual(
    breaks = c("treatment", "counterfactual"),
    values = c(3, 2), guide = "none") +
  new_scale("size") +
  # Add actual lines
  geom_line(
    data = l,
    mapping = aes(x = year, y = y, color = color, group = type, size = type, linetype = type))  +
  # Add sizing for actual lines
  scale_size_manual(
    breaks = c("treatment", "counterfactual"),
    values = c(2, 0.75), guide = "none") +
  ###############################################
  # Add Average Annual Treatment Effect Lines
  ###############################################
  geom_line(
    data = avg,
    mapping = aes(x = year, y = y, group = type, color = color, linetype = type),
    size = 1.0)  +
  # Add colors
  scale_color_manual(
    breaks = c("effect", "counterfactual", "hard", "soft", "local", "state"),
    values = c("black", "darkgrey", "#648FFF", "#DC267F", "#785EF0", "#FFB000"), 
    guide = "none") +
  # Add linetypes
  scale_linetype_manual(
    breaks = c("effect", "treatment", "counterfactual"),
    labels = c("**Average Effect**<br>(*Solid Black*)",
               "**Treatment Group**<br>(*Colored, eg. <span style='color:#648FFF'>Blue</span>*)",
               "**Counterfactual**<br>(*Dashed*)"),
    values = c("solid", "solid", "dashed"),
    guide = guide_legend(
      order = 2, 
      override.aes = list(
        size = c(3, 3, 3),
        color = c("black", "#648FFF", "grey")))) +
  #################################
  #############
  # Coefficients
  #############
  new_scale("color") +
  shadowtext::geom_shadowtext(
    data = myviz %>% filter(treatment != "state"),
    mapping = aes(x = 2017, y = -10, color = treatment, label = estimate),
    size = 5.5, bg.r = 0.1, hjust = 1, bg.color = "white", fontface = "bold") +
  shadowtext::geom_shadowtext(
    data = myviz %>% filter(treatment == "state"),
    mapping = aes(x = 2017, y = -10, color = treatment, label = estimate),
    size = 5.5, bg.r = 0.1, hjust = 1, bg.color = "black", fontface = "bold") +
  shadowtext::geom_shadowtext(
    data = myviz,
      mapping = aes(x = 2008, y = -30, label = p_value),
    color = "black", size = 3, bg.r = 0.1, bg.color = "white", hjust = 0) +
  scale_color_manual(
    breaks = c("hard", "soft", "local", "state"),
    values = c("#648FFF", "#DC267F", "#785EF0", "#FFB000"),
    guide = "none") +
   guides(color = "none", linetype = "none", fill = "none")

ggsave(g1, filename = "viz/figure_5.png", dpi = 500, width = 8, height = 8.5)
```

## Figure 7

Goal: First, visualize case studies, showing difference for each.
Show treated vs. counterfactual, with residual lines shaded by significance, 
mentioning average treatment effect

```{r}
load("model/metadata.RData")

lines <- read_rds("model/long_cases.rds") %>%
  filter(id == 0) %>%
  mutate(type = group) %>%
    mutate(color = case_when(
    type == "yct" ~ "counterfactual",
    type != "yct" ~ treatment)) %>%
    # Break into parts
  mutate(group = paste(name, treatment) %>% str_replace_all("[.]| ", "_") %>%
           str_replace_all(pattern = "__", replacement = "_") %>%
           tolower())  %>%
    mutate(group = factor(group, levels= c(
      "st_bernard_soft_policy", "st_bernard_local_policy", "st_bernard_hard_policy",
       "washington_soft_policy", "washington_local_policy","plaquemines_hard_policy")))  %>%
  filter(str_detect(group, "washington", negate= TRUE))


rescum <- read_rds("model/long_cases.rds") %>%
  pivot_wider(id_cols = c(name, outcome, treatment, id, time), names_from = group, values_from = y) %>%
  mutate(eff = ytr - yct) %>%
  filter(time %in% as.character(2005:2018)) %>%
  group_by(name, outcome, treatment, id) %>%
  summarize(eff = mean(eff)) %>%
  get_stat(qi = "eff")

reslong <- read_rds("model/long_cases.rds") %>%
  pivot_wider(id_cols = c(name, outcome, treatment, id, time), names_from = group, values_from = y) %>%
  mutate(eff = ytr - yct) %>%
  filter(time %in% as.character(2007:2018)) %>%
  group_by(name, outcome, treatment, id) %>%
  summarize(eff = mean(eff)) %>%
  get_stat(qi = "eff")

res <- bind_rows(reslong %>% mutate(range = "2007-2018"),
          rescum %>% mutate(range = "2005-2018")) %>%
    # Break into parts
  mutate(group = paste(name, treatment) %>% str_replace_all("[.]| ", "_") %>%
           str_replace_all(pattern = "__", replacement = "_") %>%
           tolower())  %>%
    mutate(group = factor(group, levels= c(
     
      "st_bernard_soft_policy", "st_bernard_local_policy", "st_bernard_hard_policy",
       "washington_soft_policy", "washington_local_policy","plaquemines_hard_policy"))) %>%
    filter(str_detect(group, "washington", negate= TRUE))


remove(rescum, reslong)

resavg <- res %>%
  mutate(stars = gtools::stars.pval(p_value),
         p_value = if_else(p_value < 0.001, 
                           true = "p < 0.001", 
                           false = paste("p = ", round(p_value, 3), sep = ""))) %>%
  mutate(label = paste(if_else(estimate > 0, "+", ""), round(estimate, 2), stars, sep = "")) %>%
  mutate(case = case_when(
    str_detect(group, "bernard") ~ "Emblematic Case",
    TRUE ~ "Divergent Case")) 

# Make a lovely background
mytiles <- lines %>%
  group_by(outcome) %>%
  summarize(ymax = max(y),
            ymin = min(y)) %>%
  group_by(outcome) %>%
  summarize(ymax = seq(from = ymin, to = ymax, length.out = 1000),
            ymin = lag(ymax, n = 1),
            ymin = case_when(
              is.na(ymin) & ymax < 0 ~ -Inf,
              is.na(ymin) & ymax > 0 ~ Inf,
              TRUE ~ ymin),
            y = (ymin + ymax) / 2) %>%
  filter(!is.infinite(ymin))
  lines$outcome %>% unique()
myblocks <- lines %>%
  group_by(outcome) %>%
  summarize(
    ymax = max(y),
    ymin = min(y),
    xmin = 1997,
    xmax = 2018,
    x = 2011,
    group = unique(group)) %>%
  ungroup() %>%
  mutate(
    selection = case_when(
      str_detect(group, "bernard") ~ "plus-white",
      str_detect(group, "plaq") & outcome == "net_migration_in" ~ "minus-black",
      str_detect(group, "plaq") & outcome == "net_income_in" ~ "plus-white",      
      TRUE ~ "plus-black"),
    y = case_when(
      selection == "plus-black" ~ 5,
      selection == "plus-white" ~ 5,
      selection == "minus-black" ~ -100),
    image = case_when(
      selection ==  "plus-black" ~ "<img src='fa/plus-circle-black.png' height='25' width='25'>",
      selection == "plus-white" ~ "<img src='fa/plus-circle-white.png' height='25' width='25'>",
      selection == "minus-black" ~ "<img src='fa/minus-circle-black.png' height='25' width='25'>")) %>%
  mutate(case = case_when(
    str_detect(group, "bernard") ~ "Emblematic Case",
    TRUE ~ "Divergent Case")) 

myperiod <- lines %>%
  group_by(outcome) %>%
  summarize(ymin = min(y),
            ymax = max(y),
            xmin = 2005, xmax = 2018)

myaxis <- data.frame(x = c(1997, 2018), y = c(0, 0))


```

```{r}
library(ggnewscale)
library(ggtext)
library(shadowtext)
library(ggallin)

g1 <- ggplot(data = lines) +
  facet_grid(
    cols = vars(group), 
    rows = vars(outcome), scales = "free",
    labeller = labeller(
      outcome = c(
        "net_income_in" = "<b>Net<br>Income<br>Inflow</b><br><i>$ per 100<br>residents</i>",
        "net_migration_in" = "<b>Net<br>Inmigration</b><br><i>per 1000<br>residents</i>"),
      group = c(
        "washington_soft_policy" = "**Washington**
          <br>
          <img src='fa/people-arrows.png' alt='tools' width='25' height='25'>
          <br>
          *Soft Policy*",
        
        "washington_local_policy" = "**Washington**
          <br>
          <img src='fa/house-user.png' alt='tools' width='25' height='25'>
          <br>
          *Local Policy*",
        
        "plaquemines_hard_policy" = "**Plaquemines**<br>
          <img src='fa/tools.png' alt='tools' width='25' height='25'>
          <br>*Hard Policy*",
        
        "st_bernard_soft_policy" = "**St. Bernard**
          <br>
          <img src='fa/people-arrows.png' alt='tools' width='25' height='25'>
          <br>
          *Soft Policy*",
        
        "st_bernard_local_policy" = "**St. Bernard**
          <br>
          <img src='fa/house-user.png' alt='tools' width='25' height='25'>
          <br>
          *Local Policy*",
        
        "st_bernard_hard_policy" = "**St. Bernard**
          <br>
          <img src='fa/tools.png' alt='tools' width='25' height='25'>
          <br>
          *Hard Policy*"))) +
  
  scale_y_continuous(
    breaks = c(300, 100, 30, 10, 3, 0, -3, -10, -30, -100, -300, -1000),
    trans = ggallin::pseudolog10_trans, expand = expansion(add = 0.1)) +
  scale_x_continuous(breaks = seq(from = 1997, to = 2018, by = 6),
                     expand = expansion(add = 0)) +

  # Identify the post-treatment period
  new_scale("fill") +
  geom_rect(data = myperiod,
            mapping = aes(xmin = 2005, xmax = xmax, ymin = ymin, ymax = ymax,
                          fill = "Short-Term<br>(2005-2006)"),
            alpha = 0.75) +
  geom_rect(data = myperiod,
            mapping = aes(xmin = 2007, xmax = xmax, ymin = ymin, ymax = ymax,
                          fill = "Long-Term<br>(2007-2018)"),
            alpha = 0.50) +
  scale_fill_manual(
    breaks = c("Short-Term<br>(2005-2006)", "Long-Term<br>(2007-2018)"),
    values = c("grey","black"), name = NULL, 
    guide = guide_legend(order = 2, override.aes = list(alpha = 0.75))) +
  # Add a color scale background to highlight particularly negative effects
  new_scale("fill") +
  geom_rect(
    data = mytiles,
    mapping = aes(xmin = 1997, xmax = 2018, 
                  ymin = ymin, ymax = ymax, fill = y),
    alpha = 0.5) +
  scale_fill_gradient2(low = "#DC267F", high = "#648FFF", mid = "white", 
                       midpoint = 0, guide = "none",
                       trans = ggallin::pseudolog10_trans)  +

  ###### Add blocks
  ######
  ggtext::geom_richtext(
    data = myblocks,
    mapping = aes(x = x, y =y, label = image), fill = NA, label.color = NA) +
  
# Plot the zero intercept
  geom_line(
    data = myaxis,
    mapping = aes(x = x, y = y), color = "grey", size = 1, alpha = 0.25)  +
  
 # Display box
  new_scale("color") +
  geom_rect(
    data = myblocks, mapping = aes(xmin = xmin, xmax = xmax,
                                   ymax = ymax, ymin = ymin,
                                   color = case),
    fill = NA, size = 2) +
  scale_color_manual(
    breaks = c("Emblematic Case", "Divergent Case"),
    labels = c("Emblematic Case<br>('On-the-line')", "Divergent Case<br>('Off-the-Line')"),
    values = c("black", "grey"), name = NULL)  +

  # Overlay actual lines
  new_scale("color") +
  new_scale("size") +
    # Add treatment and counterfactual line background
  geom_line(
    data = lines,
    mapping = aes(x = time, y = y, group = paste(type,treatment), size = type),
    color = "white", linetype = "solid") +
  scale_size_manual(
    breaks = c("ytr", "yct"),
    labels = c("treatment", "counterfactual"),
    values = c(3, 2), guide = "none", name = NULL) +
  new_scale("size") + 
  geom_line(
    data = lines,
    mapping = aes(x = time, y = y, 
                  color = color, group = paste(type, treatment), 
                  linetype = type, size = type), alpha = 0.8) +
  scale_size_manual(
    breaks = c("ytr", "yct"),
    labels = c("treatment", "counterfactual"),
    values = c(2, 0.75), guide = "none") +
  # Add colors
  scale_color_manual(
    breaks = c("Counterfactual", "Hard Policy", "Soft Policy", "Local Policy", "State Policy"),
    labels = c("Counterfactual", "Hard", "Soft", "Local", "State"),
    values = c("darkgrey", "#648FFF", "#DC267F", "#785EF0", "#FFB000"),
    guide = "none") +
  # Add linetypes
  scale_linetype_manual(
    breaks = c("ytr", "yct"),
    labels = c("**Treated Parish**<br>(*Solid*)", 
               "**Counterfactual**<br>(*Dashed*)"),
    values = c("solid", "dashed"),
    guide = guide_legend(order = 1, override.aes = list(
      size = c(3, 2),
      color = c("black", "grey"),
      alpha = 1, stroke = c(1,5)))) +
  ########
  # Themes
  ########
  theme_classic(base_size = 14) +
  theme(legend.position = "bottom",
        legend.margin = margin(0,0,0,0,"cm"),
        legend.box.margin = margin(0,0,0,0,"cm"),
        legend.text = ggtext::element_markdown(size = 12, hjust = 0.5),
        strip.background = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks.y = element_line(size = 0.5, color = "grey"),
        axis.ticks.x = element_blank(),
        axis.text.x = element_text(size = 10),
        strip.text.x = ggtext::element_markdown(size = 12, hjust = 0.5),
        strip.text.y = ggtext::element_markdown(size = 12, hjust = 0, angle = 0),
        axis.title.y = ggtext::element_markdown(size = 12, hjust = 0.5),
        plot.caption = ggtext::element_markdown(size = 11, hjust = 0),
        plot.subtitle = ggtext::element_markdown(size = 14, hjust = 0.5)) +
  ################
  # Labels
  ################
  labs(
    subtitle = "Treatment Effects in Case Study Parishes",
    y = "**Recovery Indicator**<br>*on a Bi-directional Log-Scale*",
    x = NULL, fill = NULL, color = NULL,linetype = NULL,
    caption = "**Text** shows average treatment effect long-term (2007-2018). **Statistical Significance:** *** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.10.") + 
  coord_cartesian(clip = "on") +

  shadowtext::geom_shadowtext(
    data = resavg %>% filter(range == "2007-2018"),
    mapping = aes(x = 1997, y = -70, label = label),
    bg.color = "white", bg.r = 0.3, hjust = 0, fontface = "bold", 
    color = "black", size = 4) + 
  shadowtext::geom_shadowtext(
    data = resavg %>% filter(range == "2007-2018"),
    mapping = aes(x = 1999, y = -200, label = p_value),
    bg.color = "white", bg.r = 0.3, hjust = 0, fontface = "bold", 
    color = "black", size = 3)

ggsave(g1, filename = "viz/figure_6.png", dpi = 500, width = 10, height = 6)


```


### Reviewer Questions

```{r}
load("model/metadata.RData")
read_csv("dataset_1996_2018.csv") %>%
  filter(fips %in% ct) %>%
  filter(year == 2005) %>%
  group_by(damaged_percent = if_else(damaged_percent > 0, 1, 0 )) %>%
  count()

```

```{r}
load("model/metadata.RData")
read_csv("dataset_1996_2018.csv") %>%
  filter(fips %in% tr.disaster) %>%
  filter(year == 2005) %>%
  with(cor(soft, local))
```


```{r}
load("model/metadata.RData")
read_csv("dataset_1996_2018.csv") %>%
  filter(fips %in% tr.disaster) %>%
  filter(year == 2005) %>%
  with(cor(state, hard))
```
```{r}
dat <- bind_rows(
  read_csv("data/migration_total.csv") %>% 
  filter(fips %in% "22087") %>%
  select(fips, contains("indiv_exemptions_in_")) %>%
  pivot_longer(cols = -c(fips), names_to = "type", values_to = "value") %>%
  mutate(year = str_extract(type, "[0-9]{4}") %>% as.numeric(),
         type = "in"), 

read_csv("data/migration_total.csv") %>% 
  filter(fips %in% "22087") %>%
  select(fips, contains("indiv_exemptions_out_")) %>%
  pivot_longer(cols = -c(fips), names_to = "type", values_to = "value") %>%
  mutate(year = str_extract(type, "[0-9]{4}") %>% as.numeric(),
         type = "out") 
) %>%
  pivot_wider(id_cols = c(fips, year), names_from = type, values_from = value) %>%
  mutate(net = `in` - out)

dat %>%
  ggplot(mapping = aes(x = year, y = net)) +
  geom_line() +
  scale_y_continuous(trans= ggallin::pseudolog10_trans)
dat %>%
  filter(year == 2006)
# gaining just 171 new residents 



read_csv("dataset_1996_2018.csv") %>%
  filter(fips %in% "22087") %>%
  select(fips, year, pop, contains("net"))



read_csv("dataset_1996_2018.csv") %>%
  filter(fips %in% tr.disaster) %>%
  filter(year == 2005) %>%
  summarize(median = median(pop))

107.5138 * 1000
53.71316 * 1000
```
### St. Bernard Text


```{r}
load("model/metadata.RData")

# Get for each treatment the residual effects

# SOFT - big dip in first year, gets better after
data %>%
    filter(case %in% tr) %>%
    mutate(yct = ytr - !!sym(var)) %>%
    select(id, time, case, ytr, yct) %>%
    # For the treatment group, 
    # get the average treatment and counterfactual outcomes 
    # per time slice, per bootstrap
    group_by(id, time) %>%
    summarize(ytr = mean(ytr),
              yct = mean(yct)) %>%
    ungroup() %>%
    # And pivot into a long data.frame
    pivot_longer(cols = c(ytr, yct), names_to = "group", values_to = "y") 

tr.st <- "22087" 
# Get for migration
m <- read_rds("model/mres.rds") %>%
  filter(case %in% tr.st) %>%
  filter(time %in% 2007:2018) %>%
  mutate(effect = (d_soft + d_hard + d_local)) %>%
  ungroup() %>%
  group_by(id, time) %>%
  summarize(stat = mean(effect)) %>%
  ungroup() %>%
  group_by(id) %>%
  summarize(stat = mean(stat)) %>%
  ungroup() %>%
  get_stat(qi= "stat")

m %>% head()

# We project a total effect of 
rm(list = ls())

# Get average treatment effect 2007-2018 due to soft, local, and hard combined
dat <- read_rds("model/mres.rds") %>%
  split(.$id) %>%
  map_dfr(~tibble(
    case = .x$case,
    time = .x$time,
    residual = lm(formula = boot ~ disaster + state * time + factor(time) + factor(case) - 1, data = .x)$residual), 
    .id = "id") %>%
  ungroup() %>%
  filter(case %in% tr.st) %>%
  filter(time %in% 2007:2018) %>%
  group_by(id, time) %>%
  summarize(stat = mean(residual)) %>%
  ungroup() %>%
  group_by(id) %>%
  summarize(stat = mean(stat)) %>%
  ungroup() %>%
  get_stat(qi= "stat")

dat
```

```{r}

test <- read_rds("model/period_att2_14_boot.rds") %>%
  filter(time == "3-14") %>%
  pivot_wider(id_cols = c(id, outcome), names_from = type, values_from = att.avg) %>%
  mutate(fd = soft - hard,
         times = soft / hard) 
out <- bind_rows(
test %>% 
  group_by(outcome) %>%
  get_stat(qi = "fd")  %>% mutate(type=  "fd"),
test %>%
  group_by(outcome) %>%
  get_stat(qi = "times") %>% mutate(type=  "times")
)

#How much greater is the effect of soft policy than hard policy? Soft policy's effect is \$2.96 greater (95% CI: 1.82 - 4.11) for net-income rates and 3.57 (95% CI: 1.65 - 4.15) greater for net-in-migration rates.
```


